diff options
author | Aingara Paramakuru <aparamakuru@nvidia.com> | 2014-05-05 21:14:22 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:11:01 -0400 |
commit | 1fd722f592c2e0523c5e399a2406a4e387057188 (patch) | |
tree | 3425fb1a08ec2ccc6397e39c73a5579117e00a05 | |
parent | 69e0cd3dfd8f39bc8d3529325001dcacd774f669 (diff) |
gpu: nvgpu: support gk20a virtualization
The nvgpu driver now supports using the Tegra graphics virtualization
interfaces to support gk20a in a virtualized environment.
Bug 1509608
Change-Id: I6ede15ee7bf0b0ad8a13e8eb5f557c3516ead676
Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com>
Reviewed-on: http://git-master/r/440122
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
24 files changed, 2576 insertions, 101 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 0fb6090a..6544b315 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile | |||
@@ -5,3 +5,4 @@ ccflags-y += -Werror | |||
5 | 5 | ||
6 | obj-$(CONFIG_GK20A) += gk20a/ | 6 | obj-$(CONFIG_GK20A) += gk20a/ |
7 | obj-$(CONFIG_GK20A) += gm20b/ | 7 | obj-$(CONFIG_GK20A) += gm20b/ |
8 | obj-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += vgpu/ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile index aa9237b4..fbc9cbec 100644 --- a/drivers/gpu/nvgpu/gk20a/Makefile +++ b/drivers/gpu/nvgpu/gk20a/Makefile | |||
@@ -39,5 +39,6 @@ nvgpu-y := \ | |||
39 | tsg_gk20a.o | 39 | tsg_gk20a.o |
40 | nvgpu-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o | 40 | nvgpu-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o |
41 | nvgpu-$(CONFIG_SYNC) += sync_gk20a.o | 41 | nvgpu-$(CONFIG_SYNC) += sync_gk20a.o |
42 | nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += platform_vgpu_tegra.o | ||
42 | 43 | ||
43 | obj-$(CONFIG_GK20A) := nvgpu.o | 44 | obj-$(CONFIG_GK20A) := nvgpu.o |
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 4849dbd5..1a1ca8ff 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c | |||
@@ -40,6 +40,7 @@ static void release_as_share_id(struct gk20a_as *as, int id) | |||
40 | static int gk20a_as_alloc_share(struct gk20a_as *as, | 40 | static int gk20a_as_alloc_share(struct gk20a_as *as, |
41 | struct gk20a_as_share **out) | 41 | struct gk20a_as_share **out) |
42 | { | 42 | { |
43 | struct gk20a *g = gk20a_from_as(as); | ||
43 | struct gk20a_as_share *as_share; | 44 | struct gk20a_as_share *as_share; |
44 | int err = 0; | 45 | int err = 0; |
45 | 46 | ||
@@ -55,7 +56,7 @@ static int gk20a_as_alloc_share(struct gk20a_as *as, | |||
55 | as_share->ref_cnt.counter = 1; | 56 | as_share->ref_cnt.counter = 1; |
56 | 57 | ||
57 | /* this will set as_share->vm. */ | 58 | /* this will set as_share->vm. */ |
58 | err = gk20a_vm_alloc_share(as_share); | 59 | err = g->ops.mm.vm_alloc_share(as_share); |
59 | if (err) | 60 | if (err) |
60 | goto failed; | 61 | goto failed; |
61 | 62 | ||
@@ -106,7 +107,7 @@ static int gk20a_as_ioctl_bind_channel( | |||
106 | atomic_inc(&as_share->ref_cnt); | 107 | atomic_inc(&as_share->ref_cnt); |
107 | 108 | ||
108 | /* this will set channel_gk20a->vm */ | 109 | /* this will set channel_gk20a->vm */ |
109 | err = gk20a_vm_bind_channel(as_share, ch); | 110 | err = ch->g->ops.mm.vm_bind_channel(as_share, ch); |
110 | if (err) { | 111 | if (err) { |
111 | atomic_dec(&as_share->ref_cnt); | 112 | atomic_dec(&as_share->ref_cnt); |
112 | return err; | 113 | return err; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 45757884..669ec294 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -56,16 +56,9 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); | |||
56 | 56 | ||
57 | static int channel_gk20a_commit_userd(struct channel_gk20a *c); | 57 | static int channel_gk20a_commit_userd(struct channel_gk20a *c); |
58 | static int channel_gk20a_setup_userd(struct channel_gk20a *c); | 58 | static int channel_gk20a_setup_userd(struct channel_gk20a *c); |
59 | static int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | ||
60 | u64 gpfifo_base, u32 gpfifo_entries); | ||
61 | 59 | ||
62 | static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a); | 60 | static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a); |
63 | 61 | ||
64 | static int channel_gk20a_alloc_inst(struct gk20a *g, | ||
65 | struct channel_gk20a *ch); | ||
66 | static void channel_gk20a_free_inst(struct gk20a *g, | ||
67 | struct channel_gk20a *ch); | ||
68 | |||
69 | static int channel_gk20a_update_runlist(struct channel_gk20a *c, | 62 | static int channel_gk20a_update_runlist(struct channel_gk20a *c, |
70 | bool add); | 63 | bool add); |
71 | static void gk20a_free_error_notifiers(struct channel_gk20a *ch); | 64 | static void gk20a_free_error_notifiers(struct channel_gk20a *ch); |
@@ -173,12 +166,10 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, | |||
173 | return -ENOMEM; | 166 | return -ENOMEM; |
174 | 167 | ||
175 | /* disable channel */ | 168 | /* disable channel */ |
176 | gk20a_writel(c->g, ccsr_channel_r(c->hw_chid), | 169 | c->g->ops.fifo.disable_channel(c); |
177 | gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | | ||
178 | ccsr_channel_enable_clr_true_f()); | ||
179 | 170 | ||
180 | /* preempt the channel */ | 171 | /* preempt the channel */ |
181 | WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid)); | 172 | WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid)); |
182 | 173 | ||
183 | /* value field is 8 bits long */ | 174 | /* value field is 8 bits long */ |
184 | while (value >= 1 << 8) { | 175 | while (value >= 1 << 8) { |
@@ -206,8 +197,8 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, | |||
206 | return 0; | 197 | return 0; |
207 | } | 198 | } |
208 | 199 | ||
209 | static int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | 200 | int channel_gk20a_setup_ramfc(struct channel_gk20a *c, |
210 | u64 gpfifo_base, u32 gpfifo_entries) | 201 | u64 gpfifo_base, u32 gpfifo_entries) |
211 | { | 202 | { |
212 | void *inst_ptr; | 203 | void *inst_ptr; |
213 | 204 | ||
@@ -269,7 +260,7 @@ static int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | |||
269 | 260 | ||
270 | gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); | 261 | gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); |
271 | 262 | ||
272 | return 0; | 263 | return channel_gk20a_commit_userd(c); |
273 | } | 264 | } |
274 | 265 | ||
275 | static int channel_gk20a_setup_userd(struct channel_gk20a *c) | 266 | static int channel_gk20a_setup_userd(struct channel_gk20a *c) |
@@ -347,8 +338,7 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a) | |||
347 | } | 338 | } |
348 | } | 339 | } |
349 | 340 | ||
350 | static int channel_gk20a_alloc_inst(struct gk20a *g, | 341 | int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) |
351 | struct channel_gk20a *ch) | ||
352 | { | 342 | { |
353 | struct device *d = dev_from_gk20a(g); | 343 | struct device *d = dev_from_gk20a(g); |
354 | int err = 0; | 344 | int err = 0; |
@@ -384,12 +374,11 @@ static int channel_gk20a_alloc_inst(struct gk20a *g, | |||
384 | 374 | ||
385 | clean_up: | 375 | clean_up: |
386 | gk20a_err(d, "fail"); | 376 | gk20a_err(d, "fail"); |
387 | channel_gk20a_free_inst(g, ch); | 377 | g->ops.fifo.free_inst(g, ch); |
388 | return err; | 378 | return err; |
389 | } | 379 | } |
390 | 380 | ||
391 | static void channel_gk20a_free_inst(struct gk20a *g, | 381 | void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch) |
392 | struct channel_gk20a *ch) | ||
393 | { | 382 | { |
394 | struct device *d = dev_from_gk20a(g); | 383 | struct device *d = dev_from_gk20a(g); |
395 | 384 | ||
@@ -403,7 +392,16 @@ static void channel_gk20a_free_inst(struct gk20a *g, | |||
403 | 392 | ||
404 | static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add) | 393 | static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add) |
405 | { | 394 | { |
406 | return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true); | 395 | return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true); |
396 | } | ||
397 | |||
398 | void channel_gk20a_disable(struct channel_gk20a *ch) | ||
399 | { | ||
400 | /* disable channel */ | ||
401 | gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), | ||
402 | gk20a_readl(ch->g, | ||
403 | ccsr_channel_r(ch->hw_chid)) | | ||
404 | ccsr_channel_enable_clr_true_f()); | ||
407 | } | 405 | } |
408 | 406 | ||
409 | void gk20a_channel_abort(struct channel_gk20a *ch) | 407 | void gk20a_channel_abort(struct channel_gk20a *ch) |
@@ -426,11 +424,7 @@ void gk20a_channel_abort(struct channel_gk20a *ch) | |||
426 | } | 424 | } |
427 | mutex_unlock(&ch->jobs_lock); | 425 | mutex_unlock(&ch->jobs_lock); |
428 | 426 | ||
429 | /* disable channel */ | 427 | ch->g->ops.fifo.disable_channel(ch); |
430 | gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), | ||
431 | gk20a_readl(ch->g, | ||
432 | ccsr_channel_r(ch->hw_chid)) | | ||
433 | ccsr_channel_enable_clr_true_f()); | ||
434 | 428 | ||
435 | if (released_job_semaphore) { | 429 | if (released_job_semaphore) { |
436 | wake_up_interruptible_all(&ch->semaphore_wq); | 430 | wake_up_interruptible_all(&ch->semaphore_wq); |
@@ -479,7 +473,7 @@ void gk20a_disable_channel(struct channel_gk20a *ch, | |||
479 | gk20a_wait_channel_idle(ch); | 473 | gk20a_wait_channel_idle(ch); |
480 | 474 | ||
481 | /* preempt the channel */ | 475 | /* preempt the channel */ |
482 | gk20a_fifo_preempt_channel(ch->g, ch->hw_chid); | 476 | ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid); |
483 | 477 | ||
484 | /* remove channel from runlist */ | 478 | /* remove channel from runlist */ |
485 | channel_gk20a_update_runlist(ch, false); | 479 | channel_gk20a_update_runlist(ch, false); |
@@ -643,7 +637,7 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish) | |||
643 | gk20a_free_error_notifiers(ch); | 637 | gk20a_free_error_notifiers(ch); |
644 | 638 | ||
645 | /* release channel ctx */ | 639 | /* release channel ctx */ |
646 | gk20a_free_channel_ctx(ch); | 640 | g->ops.gr.free_channel_ctx(ch); |
647 | 641 | ||
648 | gk20a_gr_flush_channel_tlb(gr); | 642 | gk20a_gr_flush_channel_tlb(gr); |
649 | 643 | ||
@@ -683,8 +677,8 @@ unbind: | |||
683 | if (gk20a_is_channel_marked_as_tsg(ch)) | 677 | if (gk20a_is_channel_marked_as_tsg(ch)) |
684 | gk20a_tsg_unbind_channel(ch); | 678 | gk20a_tsg_unbind_channel(ch); |
685 | 679 | ||
686 | channel_gk20a_unbind(ch); | 680 | g->ops.fifo.unbind_channel(ch); |
687 | channel_gk20a_free_inst(g, ch); | 681 | g->ops.fifo.free_inst(g, ch); |
688 | 682 | ||
689 | ch->vpr = false; | 683 | ch->vpr = false; |
690 | ch->vm = NULL; | 684 | ch->vm = NULL; |
@@ -747,7 +741,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g) | |||
747 | 741 | ||
748 | ch->g = g; | 742 | ch->g = g; |
749 | 743 | ||
750 | if (channel_gk20a_alloc_inst(g, ch)) { | 744 | if (g->ops.fifo.alloc_inst(g, ch)) { |
751 | ch->in_use = false; | 745 | ch->in_use = false; |
752 | gk20a_err(dev_from_gk20a(g), | 746 | gk20a_err(dev_from_gk20a(g), |
753 | "failed to open gk20a channel, out of inst mem"); | 747 | "failed to open gk20a channel, out of inst mem"); |
@@ -1097,7 +1091,6 @@ static void recycle_priv_cmdbuf(struct channel_gk20a *c) | |||
1097 | gk20a_dbg_fn("done"); | 1091 | gk20a_dbg_fn("done"); |
1098 | } | 1092 | } |
1099 | 1093 | ||
1100 | |||
1101 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | 1094 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, |
1102 | struct nvhost_alloc_gpfifo_args *args) | 1095 | struct nvhost_alloc_gpfifo_args *args) |
1103 | { | 1096 | { |
@@ -1181,10 +1174,11 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | |||
1181 | gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d", | 1174 | gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d", |
1182 | c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num); | 1175 | c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num); |
1183 | 1176 | ||
1184 | channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num); | ||
1185 | |||
1186 | channel_gk20a_setup_userd(c); | 1177 | channel_gk20a_setup_userd(c); |
1187 | channel_gk20a_commit_userd(c); | 1178 | |
1179 | err = g->ops.fifo.setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num); | ||
1180 | if (err) | ||
1181 | goto clean_up_unmap; | ||
1188 | 1182 | ||
1189 | /* TBD: setup engine contexts */ | 1183 | /* TBD: setup engine contexts */ |
1190 | 1184 | ||
@@ -1550,7 +1544,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
1550 | /* We don't know what context is currently running... */ | 1544 | /* We don't know what context is currently running... */ |
1551 | /* Note also: there can be more than one context associated with the */ | 1545 | /* Note also: there can be more than one context associated with the */ |
1552 | /* address space (vm). */ | 1546 | /* address space (vm). */ |
1553 | gk20a_mm_tlb_invalidate(c->vm); | 1547 | g->ops.mm.tlb_invalidate(c->vm); |
1554 | 1548 | ||
1555 | /* Make sure we have enough space for gpfifo entries. If not, | 1549 | /* Make sure we have enough space for gpfifo entries. If not, |
1556 | * wait for signals from completed submits */ | 1550 | * wait for signals from completed submits */ |
@@ -1929,7 +1923,7 @@ static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, | |||
1929 | 1923 | ||
1930 | gk20a_dbg_fn(""); | 1924 | gk20a_dbg_fn(""); |
1931 | 1925 | ||
1932 | return gr_gk20a_bind_ctxsw_zcull(g, gr, ch, | 1926 | return g->ops.gr.bind_ctxsw_zcull(g, gr, ch, |
1933 | args->gpu_va, args->mode); | 1927 | args->gpu_va, args->mode); |
1934 | } | 1928 | } |
1935 | 1929 | ||
@@ -1945,7 +1939,7 @@ int gk20a_channel_suspend(struct gk20a *g) | |||
1945 | gk20a_dbg_fn(""); | 1939 | gk20a_dbg_fn(""); |
1946 | 1940 | ||
1947 | /* wait for engine idle */ | 1941 | /* wait for engine idle */ |
1948 | err = gk20a_fifo_wait_engine_idle(g); | 1942 | err = g->ops.fifo.wait_engine_idle(g); |
1949 | if (err) | 1943 | if (err) |
1950 | return err; | 1944 | return err; |
1951 | 1945 | ||
@@ -1954,22 +1948,20 @@ int gk20a_channel_suspend(struct gk20a *g) | |||
1954 | 1948 | ||
1955 | gk20a_dbg_info("suspend channel %d", chid); | 1949 | gk20a_dbg_info("suspend channel %d", chid); |
1956 | /* disable channel */ | 1950 | /* disable channel */ |
1957 | gk20a_writel(g, ccsr_channel_r(chid), | 1951 | g->ops.fifo.disable_channel(&f->channel[chid]); |
1958 | gk20a_readl(g, ccsr_channel_r(chid)) | | ||
1959 | ccsr_channel_enable_clr_true_f()); | ||
1960 | /* preempt the channel */ | 1952 | /* preempt the channel */ |
1961 | gk20a_fifo_preempt_channel(g, chid); | 1953 | g->ops.fifo.preempt_channel(g, chid); |
1962 | 1954 | ||
1963 | channels_in_use = true; | 1955 | channels_in_use = true; |
1964 | } | 1956 | } |
1965 | } | 1957 | } |
1966 | 1958 | ||
1967 | if (channels_in_use) { | 1959 | if (channels_in_use) { |
1968 | gk20a_fifo_update_runlist(g, 0, ~0, false, true); | 1960 | g->ops.fifo.update_runlist(g, 0, ~0, false, true); |
1969 | 1961 | ||
1970 | for (chid = 0; chid < f->num_channels; chid++) { | 1962 | for (chid = 0; chid < f->num_channels; chid++) { |
1971 | if (f->channel[chid].in_use) | 1963 | if (f->channel[chid].in_use) |
1972 | channel_gk20a_unbind(&f->channel[chid]); | 1964 | g->ops.fifo.unbind_channel(&f->channel[chid]); |
1973 | } | 1965 | } |
1974 | } | 1966 | } |
1975 | 1967 | ||
@@ -1996,7 +1988,7 @@ int gk20a_channel_resume(struct gk20a *g) | |||
1996 | } | 1988 | } |
1997 | 1989 | ||
1998 | if (channels_in_use) | 1990 | if (channels_in_use) |
1999 | gk20a_fifo_update_runlist(g, 0, ~0, true, true); | 1991 | g->ops.fifo.update_runlist(g, 0, ~0, true, true); |
2000 | 1992 | ||
2001 | gk20a_dbg_fn("done"); | 1993 | gk20a_dbg_fn("done"); |
2002 | return 0; | 1994 | return 0; |
@@ -2074,6 +2066,11 @@ clean_up: | |||
2074 | void gk20a_init_channel(struct gpu_ops *gops) | 2066 | void gk20a_init_channel(struct gpu_ops *gops) |
2075 | { | 2067 | { |
2076 | gops->fifo.bind_channel = channel_gk20a_bind; | 2068 | gops->fifo.bind_channel = channel_gk20a_bind; |
2069 | gops->fifo.unbind_channel = channel_gk20a_unbind; | ||
2070 | gops->fifo.disable_channel = channel_gk20a_disable; | ||
2071 | gops->fifo.alloc_inst = channel_gk20a_alloc_inst; | ||
2072 | gops->fifo.free_inst = channel_gk20a_free_inst; | ||
2073 | gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; | ||
2077 | } | 2074 | } |
2078 | 2075 | ||
2079 | long gk20a_channel_ioctl(struct file *filp, | 2076 | long gk20a_channel_ioctl(struct file *filp, |
@@ -2144,7 +2141,7 @@ long gk20a_channel_ioctl(struct file *filp, | |||
2144 | __func__, cmd); | 2141 | __func__, cmd); |
2145 | return err; | 2142 | return err; |
2146 | } | 2143 | } |
2147 | err = gk20a_alloc_obj_ctx(ch, | 2144 | err = ch->g->ops.gr.alloc_obj_ctx(ch, |
2148 | (struct nvhost_alloc_obj_ctx_args *)buf); | 2145 | (struct nvhost_alloc_obj_ctx_args *)buf); |
2149 | gk20a_idle(dev); | 2146 | gk20a_idle(dev); |
2150 | break; | 2147 | break; |
@@ -2156,7 +2153,7 @@ long gk20a_channel_ioctl(struct file *filp, | |||
2156 | __func__, cmd); | 2153 | __func__, cmd); |
2157 | return err; | 2154 | return err; |
2158 | } | 2155 | } |
2159 | err = gk20a_free_obj_ctx(ch, | 2156 | err = ch->g->ops.gr.free_obj_ctx(ch, |
2160 | (struct nvhost_free_obj_ctx_args *)buf); | 2157 | (struct nvhost_free_obj_ctx_args *)buf); |
2161 | gk20a_idle(dev); | 2158 | gk20a_idle(dev); |
2162 | break; | 2159 | break; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 2ea3eccb..37ca8244 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -144,6 +144,10 @@ struct channel_gk20a { | |||
144 | void *error_notifier_va; | 144 | void *error_notifier_va; |
145 | 145 | ||
146 | struct gk20a_channel_sync *sync; | 146 | struct gk20a_channel_sync *sync; |
147 | |||
148 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
149 | u64 virt_ctx; | ||
150 | #endif | ||
147 | }; | 151 | }; |
148 | 152 | ||
149 | static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) | 153 | static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) |
@@ -193,4 +197,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
193 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | 197 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, |
194 | struct nvhost_alloc_gpfifo_args *args); | 198 | struct nvhost_alloc_gpfifo_args *args); |
195 | 199 | ||
200 | void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); | ||
201 | void channel_gk20a_disable(struct channel_gk20a *ch); | ||
202 | int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch); | ||
203 | void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch); | ||
204 | int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | ||
205 | u64 gpfifo_base, u32 gpfifo_entries); | ||
206 | |||
196 | #endif /*__CHANNEL_GK20A_H__*/ | 207 | #endif /*__CHANNEL_GK20A_H__*/ |
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index e5628c3f..7338f842 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -158,6 +158,9 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
158 | struct zbc_entry *zbc_val; | 158 | struct zbc_entry *zbc_val; |
159 | struct zbc_query_params *zbc_tbl; | 159 | struct zbc_query_params *zbc_tbl; |
160 | int i, err = 0; | 160 | int i, err = 0; |
161 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
162 | struct gk20a_platform *platform = platform_get_drvdata(dev); | ||
163 | #endif | ||
161 | 164 | ||
162 | gk20a_dbg_fn(""); | 165 | gk20a_dbg_fn(""); |
163 | 166 | ||
@@ -197,7 +200,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
197 | if (zcull_info == NULL) | 200 | if (zcull_info == NULL) |
198 | return -ENOMEM; | 201 | return -ENOMEM; |
199 | 202 | ||
200 | err = gr_gk20a_get_zcull_info(g, &g->gr, zcull_info); | 203 | err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info); |
201 | if (err) { | 204 | if (err) { |
202 | kfree(zcull_info); | 205 | kfree(zcull_info); |
203 | break; | 206 | break; |
@@ -219,6 +222,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
219 | case NVHOST_GPU_IOCTL_ZBC_SET_TABLE: | 222 | case NVHOST_GPU_IOCTL_ZBC_SET_TABLE: |
220 | set_table_args = (struct nvhost_gpu_zbc_set_table_args *)buf; | 223 | set_table_args = (struct nvhost_gpu_zbc_set_table_args *)buf; |
221 | 224 | ||
225 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
226 | if (platform->virtual_dev) | ||
227 | return -ENOMEM; | ||
228 | #endif | ||
229 | |||
222 | zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL); | 230 | zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL); |
223 | if (zbc_val == NULL) | 231 | if (zbc_val == NULL) |
224 | return -ENOMEM; | 232 | return -ENOMEM; |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 4363129d..e6b3fd5f 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -1173,7 +1173,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose) | |||
1173 | 1173 | ||
1174 | gk20a_channel_abort(ch); | 1174 | gk20a_channel_abort(ch); |
1175 | for (i = 0; i < g->fifo.max_runlists; i++) | 1175 | for (i = 0; i < g->fifo.max_runlists; i++) |
1176 | gk20a_fifo_update_runlist(g, i, | 1176 | g->ops.fifo.update_runlist(g, i, |
1177 | hw_chid, false, false); | 1177 | hw_chid, false, false); |
1178 | 1178 | ||
1179 | if (gk20a_fifo_set_ctx_mmu_error(g, ch)) | 1179 | if (gk20a_fifo_set_ctx_mmu_error(g, ch)) |
@@ -1620,7 +1620,7 @@ int gk20a_fifo_disable_engine_activity(struct gk20a *g, | |||
1620 | pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat); | 1620 | pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat); |
1621 | 1621 | ||
1622 | if (pbdma_chid != ~0) { | 1622 | if (pbdma_chid != ~0) { |
1623 | err = gk20a_fifo_preempt_channel(g, pbdma_chid); | 1623 | err = g->ops.fifo.preempt_channel(g, pbdma_chid); |
1624 | if (err) | 1624 | if (err) |
1625 | goto clean_up; | 1625 | goto clean_up; |
1626 | } | 1626 | } |
@@ -1636,7 +1636,7 @@ int gk20a_fifo_disable_engine_activity(struct gk20a *g, | |||
1636 | engine_chid = fifo_engine_status_next_id_v(eng_stat); | 1636 | engine_chid = fifo_engine_status_next_id_v(eng_stat); |
1637 | 1637 | ||
1638 | if (engine_chid != ~0 && engine_chid != pbdma_chid) { | 1638 | if (engine_chid != ~0 && engine_chid != pbdma_chid) { |
1639 | err = gk20a_fifo_preempt_channel(g, engine_chid); | 1639 | err = g->ops.fifo.preempt_channel(g, engine_chid); |
1640 | if (err) | 1640 | if (err) |
1641 | goto clean_up; | 1641 | goto clean_up; |
1642 | } | 1642 | } |
@@ -1960,6 +1960,9 @@ static void gk20a_fifo_apply_pb_timeout(struct gk20a *g) | |||
1960 | void gk20a_init_fifo(struct gpu_ops *gops) | 1960 | void gk20a_init_fifo(struct gpu_ops *gops) |
1961 | { | 1961 | { |
1962 | gk20a_init_channel(gops); | 1962 | gk20a_init_channel(gops); |
1963 | gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; | ||
1964 | gops->fifo.update_runlist = gk20a_fifo_update_runlist; | ||
1963 | gops->fifo.trigger_mmu_fault = gk20a_fifo_trigger_mmu_fault; | 1965 | gops->fifo.trigger_mmu_fault = gk20a_fifo_trigger_mmu_fault; |
1964 | gops->fifo.apply_pb_timeout = gk20a_fifo_apply_pb_timeout; | 1966 | gops->fifo.apply_pb_timeout = gk20a_fifo_apply_pb_timeout; |
1967 | gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; | ||
1965 | } | 1968 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 0816878a..3499cc89 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/tegra-powergate.h> | 40 | #include <linux/tegra-powergate.h> |
41 | #include <linux/tegra_pm_domains.h> | 41 | #include <linux/tegra_pm_domains.h> |
42 | #include <linux/clk/tegra.h> | 42 | #include <linux/clk/tegra.h> |
43 | #include <linux/kthread.h> | ||
43 | 44 | ||
44 | #include <linux/sched.h> | 45 | #include <linux/sched.h> |
45 | #include <linux/input-cfboost.h> | 46 | #include <linux/input-cfboost.h> |
@@ -57,6 +58,9 @@ | |||
57 | #include "dbg_gpu_gk20a.h" | 58 | #include "dbg_gpu_gk20a.h" |
58 | #include "hal.h" | 59 | #include "hal.h" |
59 | #include "nvhost_acm.h" | 60 | #include "nvhost_acm.h" |
61 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
62 | #include "vgpu/vgpu.h" | ||
63 | #endif | ||
60 | 64 | ||
61 | #define CREATE_TRACE_POINTS | 65 | #define CREATE_TRACE_POINTS |
62 | #include <trace/events/gk20a.h> | 66 | #include <trace/events/gk20a.h> |
@@ -737,6 +741,17 @@ static int gk20a_init_client(struct platform_device *dev) | |||
737 | 741 | ||
738 | gk20a_dbg_fn(""); | 742 | gk20a_dbg_fn(""); |
739 | 743 | ||
744 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
745 | { | ||
746 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
747 | |||
748 | if (platform->virtual_dev) { | ||
749 | err = vgpu_pm_finalize_poweron(&dev->dev); | ||
750 | if (err) | ||
751 | return err; | ||
752 | } | ||
753 | } | ||
754 | #endif | ||
740 | #ifndef CONFIG_PM_RUNTIME | 755 | #ifndef CONFIG_PM_RUNTIME |
741 | gk20a_pm_finalize_poweron(&dev->dev); | 756 | gk20a_pm_finalize_poweron(&dev->dev); |
742 | #endif | 757 | #endif |
@@ -753,6 +768,16 @@ static int gk20a_init_client(struct platform_device *dev) | |||
753 | static void gk20a_deinit_client(struct platform_device *dev) | 768 | static void gk20a_deinit_client(struct platform_device *dev) |
754 | { | 769 | { |
755 | gk20a_dbg_fn(""); | 770 | gk20a_dbg_fn(""); |
771 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
772 | { | ||
773 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
774 | |||
775 | if (platform->virtual_dev) { | ||
776 | vgpu_pm_prepare_poweroff(&dev->dev); | ||
777 | return; | ||
778 | } | ||
779 | } | ||
780 | #endif | ||
756 | #ifndef CONFIG_PM_RUNTIME | 781 | #ifndef CONFIG_PM_RUNTIME |
757 | gk20a_pm_prepare_poweroff(&dev->dev); | 782 | gk20a_pm_prepare_poweroff(&dev->dev); |
758 | #endif | 783 | #endif |
@@ -1006,6 +1031,10 @@ static struct of_device_id tegra_gk20a_of_match[] = { | |||
1006 | .data = &gk20a_tegra_platform }, | 1031 | .data = &gk20a_tegra_platform }, |
1007 | { .compatible = "nvidia,tegra210-gm20b", | 1032 | { .compatible = "nvidia,tegra210-gm20b", |
1008 | .data = &gm20b_tegra_platform }, | 1033 | .data = &gm20b_tegra_platform }, |
1034 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
1035 | { .compatible = "nvidia,tegra124-gk20a-vgpu", | ||
1036 | .data = &vgpu_tegra_platform }, | ||
1037 | #endif | ||
1009 | #else | 1038 | #else |
1010 | { .compatible = "nvidia,tegra124-gk20a", | 1039 | { .compatible = "nvidia,tegra124-gk20a", |
1011 | .data = &gk20a_generic_platform }, | 1040 | .data = &gk20a_generic_platform }, |
@@ -1057,7 +1086,7 @@ static int gk20a_create_device( | |||
1057 | return 0; | 1086 | return 0; |
1058 | } | 1087 | } |
1059 | 1088 | ||
1060 | static void gk20a_user_deinit(struct platform_device *dev) | 1089 | void gk20a_user_deinit(struct platform_device *dev) |
1061 | { | 1090 | { |
1062 | struct gk20a *g = get_gk20a(dev); | 1091 | struct gk20a *g = get_gk20a(dev); |
1063 | 1092 | ||
@@ -1098,7 +1127,7 @@ static void gk20a_user_deinit(struct platform_device *dev) | |||
1098 | class_destroy(g->class); | 1127 | class_destroy(g->class); |
1099 | } | 1128 | } |
1100 | 1129 | ||
1101 | static int gk20a_user_init(struct platform_device *dev) | 1130 | int gk20a_user_init(struct platform_device *dev) |
1102 | { | 1131 | { |
1103 | int err; | 1132 | int err; |
1104 | dev_t devno; | 1133 | dev_t devno; |
@@ -1403,6 +1432,11 @@ static int gk20a_probe(struct platform_device *dev) | |||
1403 | 1432 | ||
1404 | platform_set_drvdata(dev, platform); | 1433 | platform_set_drvdata(dev, platform); |
1405 | 1434 | ||
1435 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
1436 | if (platform->virtual_dev) | ||
1437 | return vgpu_probe(dev); | ||
1438 | #endif | ||
1439 | |||
1406 | gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL); | 1440 | gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL); |
1407 | if (!gk20a) { | 1441 | if (!gk20a) { |
1408 | dev_err(&dev->dev, "couldn't allocate gk20a support"); | 1442 | dev_err(&dev->dev, "couldn't allocate gk20a support"); |
@@ -1546,8 +1580,16 @@ static int gk20a_probe(struct platform_device *dev) | |||
1546 | static int __exit gk20a_remove(struct platform_device *dev) | 1580 | static int __exit gk20a_remove(struct platform_device *dev) |
1547 | { | 1581 | { |
1548 | struct gk20a *g = get_gk20a(dev); | 1582 | struct gk20a *g = get_gk20a(dev); |
1583 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
1584 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
1585 | #endif | ||
1549 | gk20a_dbg_fn(""); | 1586 | gk20a_dbg_fn(""); |
1550 | 1587 | ||
1588 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
1589 | if (platform->virtual_dev) | ||
1590 | return vgpu_remove(dev); | ||
1591 | #endif | ||
1592 | |||
1551 | #ifdef CONFIG_INPUT_CFBOOST | 1593 | #ifdef CONFIG_INPUT_CFBOOST |
1552 | if (g->boost_added) | 1594 | if (g->boost_added) |
1553 | cfb_remove_device(&dev->dev); | 1595 | cfb_remove_device(&dev->dev); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index a1080f0b..b813541a 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -131,6 +131,16 @@ struct gpu_ops { | |||
131 | u32 reg_offset); | 131 | u32 reg_offset); |
132 | int (*load_ctxsw_ucode)(struct gk20a *g); | 132 | int (*load_ctxsw_ucode)(struct gk20a *g); |
133 | u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); | 133 | u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); |
134 | void (*free_channel_ctx)(struct channel_gk20a *c); | ||
135 | int (*alloc_obj_ctx)(struct channel_gk20a *c, | ||
136 | struct nvhost_alloc_obj_ctx_args *args); | ||
137 | int (*free_obj_ctx)(struct channel_gk20a *c, | ||
138 | struct nvhost_free_obj_ctx_args *args); | ||
139 | int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr, | ||
140 | struct channel_gk20a *c, u64 zcull_va, | ||
141 | u32 mode); | ||
142 | int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr, | ||
143 | struct gr_zcull_info *zcull_params); | ||
134 | } gr; | 144 | } gr; |
135 | const char *name; | 145 | const char *name; |
136 | struct { | 146 | struct { |
@@ -148,9 +158,20 @@ struct gpu_ops { | |||
148 | } clock_gating; | 158 | } clock_gating; |
149 | struct { | 159 | struct { |
150 | void (*bind_channel)(struct channel_gk20a *ch_gk20a); | 160 | void (*bind_channel)(struct channel_gk20a *ch_gk20a); |
161 | void (*unbind_channel)(struct channel_gk20a *ch_gk20a); | ||
162 | void (*disable_channel)(struct channel_gk20a *ch); | ||
163 | int (*alloc_inst)(struct gk20a *g, struct channel_gk20a *ch); | ||
164 | void (*free_inst)(struct gk20a *g, struct channel_gk20a *ch); | ||
165 | int (*setup_ramfc)(struct channel_gk20a *c, u64 gpfifo_base, | ||
166 | u32 gpfifo_entries); | ||
167 | int (*preempt_channel)(struct gk20a *g, u32 hw_chid); | ||
168 | int (*update_runlist)(struct gk20a *g, u32 runlist_id, | ||
169 | u32 hw_chid, bool add, | ||
170 | bool wait_for_finish); | ||
151 | void (*trigger_mmu_fault)(struct gk20a *g, | 171 | void (*trigger_mmu_fault)(struct gk20a *g, |
152 | unsigned long engine_ids); | 172 | unsigned long engine_ids); |
153 | void (*apply_pb_timeout)(struct gk20a *g); | 173 | void (*apply_pb_timeout)(struct gk20a *g); |
174 | int (*wait_engine_idle)(struct gk20a *g); | ||
154 | } fifo; | 175 | } fifo; |
155 | struct pmu_v { | 176 | struct pmu_v { |
156 | /*used for change of enum zbc update cmd id from ver 0 to ver1*/ | 177 | /*used for change of enum zbc update cmd id from ver 0 to ver1*/ |
@@ -241,6 +262,31 @@ struct gpu_ops { | |||
241 | void (*clear_sparse)(struct vm_gk20a *vm, u64 vaddr, | 262 | void (*clear_sparse)(struct vm_gk20a *vm, u64 vaddr, |
242 | u64 size, u32 pgsz_idx); | 263 | u64 size, u32 pgsz_idx); |
243 | bool (*is_debug_mode_enabled)(struct gk20a *g); | 264 | bool (*is_debug_mode_enabled)(struct gk20a *g); |
265 | u64 (*gmmu_map)(struct vm_gk20a *vm, | ||
266 | u64 map_offset, | ||
267 | struct sg_table *sgt, | ||
268 | u64 buffer_offset, | ||
269 | u64 size, | ||
270 | int pgsz_idx, | ||
271 | u8 kind_v, | ||
272 | u32 ctag_offset, | ||
273 | u32 flags, | ||
274 | int rw_flag, | ||
275 | bool clear_ctags); | ||
276 | void (*gmmu_unmap)(struct vm_gk20a *vm, | ||
277 | u64 vaddr, | ||
278 | u64 size, | ||
279 | int pgsz_idx, | ||
280 | bool va_allocated, | ||
281 | int rw_flag); | ||
282 | void (*vm_remove)(struct vm_gk20a *vm); | ||
283 | int (*vm_alloc_share)(struct gk20a_as_share *as_share); | ||
284 | int (*vm_bind_channel)(struct gk20a_as_share *as_share, | ||
285 | struct channel_gk20a *ch); | ||
286 | int (*fb_flush)(struct gk20a *g); | ||
287 | void (*l2_invalidate)(struct gk20a *g); | ||
288 | void (*l2_flush)(struct gk20a *g, bool invalidate); | ||
289 | void (*tlb_invalidate)(struct vm_gk20a *vm); | ||
244 | } mm; | 290 | } mm; |
245 | struct { | 291 | struct { |
246 | int (*prepare_ucode)(struct gk20a *g); | 292 | int (*prepare_ucode)(struct gk20a *g); |
@@ -648,4 +694,7 @@ gk20a_request_firmware(struct gk20a *g, const char *fw_name); | |||
648 | 694 | ||
649 | int gk20a_init_gpu_characteristics(struct gk20a *g); | 695 | int gk20a_init_gpu_characteristics(struct gk20a *g); |
650 | 696 | ||
697 | int gk20a_user_init(struct platform_device *dev); | ||
698 | void gk20a_user_deinit(struct platform_device *dev); | ||
699 | |||
651 | #endif /* _NVHOST_GK20A_H_ */ | 700 | #endif /* _NVHOST_GK20A_H_ */ |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index ef7776df..892a138e 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -825,7 +825,7 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c, | |||
825 | } | 825 | } |
826 | } | 826 | } |
827 | 827 | ||
828 | gk20a_mm_fb_flush(g); | 828 | g->ops.mm.fb_flush(g); |
829 | 829 | ||
830 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, | 830 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, |
831 | ch_ctx->zcull_ctx.ctx_sw_mode); | 831 | ch_ctx->zcull_ctx.ctx_sw_mode); |
@@ -7077,4 +7077,9 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
7077 | gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments; | 7077 | gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments; |
7078 | gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; | 7078 | gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; |
7079 | gops->gr.get_gpc_tpc_mask = gr_gk20a_get_gpc_tpc_mask; | 7079 | gops->gr.get_gpc_tpc_mask = gr_gk20a_get_gpc_tpc_mask; |
7080 | gops->gr.free_channel_ctx = gk20a_free_channel_ctx; | ||
7081 | gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx; | ||
7082 | gops->gr.free_obj_ctx = gk20a_free_obj_ctx; | ||
7083 | gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull; | ||
7084 | gops->gr.get_zcull_info = gr_gk20a_get_zcull_info; | ||
7080 | } | 7085 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 654938b2..3feb675b 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -88,7 +88,6 @@ static inline u32 lo32(u64 f) | |||
88 | return (u32)(f & 0xffffffff); | 88 | return (u32)(f & 0xffffffff); |
89 | } | 89 | } |
90 | 90 | ||
91 | static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); | ||
92 | static struct mapped_buffer_node *find_mapped_buffer_locked( | 91 | static struct mapped_buffer_node *find_mapped_buffer_locked( |
93 | struct rb_root *root, u64 addr); | 92 | struct rb_root *root, u64 addr); |
94 | static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( | 93 | static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( |
@@ -100,7 +99,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
100 | u64 first_vaddr, u64 last_vaddr, | 99 | u64 first_vaddr, u64 last_vaddr, |
101 | u8 kind_v, u32 ctag_offset, bool cacheable, | 100 | u8 kind_v, u32 ctag_offset, bool cacheable, |
102 | int rw_flag); | 101 | int rw_flag); |
103 | static void gk20a_vm_remove_support(struct vm_gk20a *vm); | ||
104 | static int gk20a_init_system_vm(struct mm_gk20a *mm); | 102 | static int gk20a_init_system_vm(struct mm_gk20a *mm); |
105 | static int gk20a_init_bar1_vm(struct mm_gk20a *mm); | 103 | static int gk20a_init_bar1_vm(struct mm_gk20a *mm); |
106 | 104 | ||
@@ -335,6 +333,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
335 | gk20a_init_bar1_vm(mm); | 333 | gk20a_init_bar1_vm(mm); |
336 | gk20a_init_system_vm(mm); | 334 | gk20a_init_system_vm(mm); |
337 | 335 | ||
336 | /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ | ||
337 | g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; | ||
338 | mm->remove_support = gk20a_remove_mm_support; | 338 | mm->remove_support = gk20a_remove_mm_support; |
339 | mm->sw_ready = true; | 339 | mm->sw_ready = true; |
340 | 340 | ||
@@ -833,9 +833,9 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset) | |||
833 | mutex_unlock(&vm->update_gmmu_lock); | 833 | mutex_unlock(&vm->update_gmmu_lock); |
834 | } | 834 | } |
835 | 835 | ||
836 | static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | 836 | u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, |
837 | u64 size, | 837 | u64 size, |
838 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | 838 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) |
839 | 839 | ||
840 | { | 840 | { |
841 | struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; | 841 | struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; |
@@ -881,9 +881,9 @@ static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | |||
881 | return offset; | 881 | return offset; |
882 | } | 882 | } |
883 | 883 | ||
884 | static int gk20a_vm_free_va(struct vm_gk20a *vm, | 884 | int gk20a_vm_free_va(struct vm_gk20a *vm, |
885 | u64 offset, u64 size, | 885 | u64 offset, u64 size, |
886 | enum gmmu_pgsz_gk20a pgsz_idx) | 886 | enum gmmu_pgsz_gk20a pgsz_idx) |
887 | { | 887 | { |
888 | struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; | 888 | struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; |
889 | u32 page_size = gmmu_page_sizes[pgsz_idx]; | 889 | u32 page_size = gmmu_page_sizes[pgsz_idx]; |
@@ -1100,21 +1100,32 @@ static int validate_fixed_buffer(struct vm_gk20a *vm, | |||
1100 | return 0; | 1100 | return 0; |
1101 | } | 1101 | } |
1102 | 1102 | ||
1103 | static u64 __locked_gmmu_map(struct vm_gk20a *vm, | 1103 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
1104 | u64 map_offset, | 1104 | u64 map_offset, |
1105 | struct sg_table *sgt, | 1105 | struct sg_table *sgt, |
1106 | u64 buffer_offset, | 1106 | u64 buffer_offset, |
1107 | u64 size, | 1107 | u64 size, |
1108 | int pgsz_idx, | 1108 | int pgsz_idx, |
1109 | u8 kind_v, | 1109 | u8 kind_v, |
1110 | u32 ctag_offset, | 1110 | u32 ctag_offset, |
1111 | u32 flags, | 1111 | u32 flags, |
1112 | int rw_flag) | 1112 | int rw_flag, |
1113 | bool clear_ctags) | ||
1113 | { | 1114 | { |
1114 | int err = 0, i = 0; | 1115 | int err = 0, i = 0; |
1115 | bool allocated = false; | 1116 | bool allocated = false; |
1116 | u32 pde_lo, pde_hi; | 1117 | u32 pde_lo, pde_hi; |
1117 | struct device *d = dev_from_vm(vm); | 1118 | struct device *d = dev_from_vm(vm); |
1119 | struct gk20a *g = gk20a_from_vm(vm); | ||
1120 | |||
1121 | if (clear_ctags && ctag_offset) { | ||
1122 | u32 ctag_lines = ALIGN(size, COMP_TAG_LINE_SIZE) >> | ||
1123 | COMP_TAG_LINE_SIZE_SHIFT; | ||
1124 | |||
1125 | /* init/clear the ctag buffer */ | ||
1126 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, | ||
1127 | ctag_offset, ctag_offset + ctag_lines - 1); | ||
1128 | } | ||
1118 | 1129 | ||
1119 | /* Allocate (or validate when map_offset != 0) the virtual address. */ | 1130 | /* Allocate (or validate when map_offset != 0) the virtual address. */ |
1120 | if (!map_offset) { | 1131 | if (!map_offset) { |
@@ -1167,12 +1178,12 @@ fail_alloc: | |||
1167 | return 0; | 1178 | return 0; |
1168 | } | 1179 | } |
1169 | 1180 | ||
1170 | static void __locked_gmmu_unmap(struct vm_gk20a *vm, | 1181 | void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, |
1171 | u64 vaddr, | 1182 | u64 vaddr, |
1172 | u64 size, | 1183 | u64 size, |
1173 | int pgsz_idx, | 1184 | int pgsz_idx, |
1174 | bool va_allocated, | 1185 | bool va_allocated, |
1175 | int rw_flag) | 1186 | int rw_flag) |
1176 | { | 1187 | { |
1177 | int err = 0; | 1188 | int err = 0; |
1178 | struct gk20a *g = gk20a_from_vm(vm); | 1189 | struct gk20a *g = gk20a_from_vm(vm); |
@@ -1298,6 +1309,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1298 | struct buffer_attrs bfr = {0}; | 1309 | struct buffer_attrs bfr = {0}; |
1299 | struct gk20a_comptags comptags; | 1310 | struct gk20a_comptags comptags; |
1300 | u64 buf_addr; | 1311 | u64 buf_addr; |
1312 | bool clear_ctags = false; | ||
1301 | 1313 | ||
1302 | mutex_lock(&vm->update_gmmu_lock); | 1314 | mutex_lock(&vm->update_gmmu_lock); |
1303 | 1315 | ||
@@ -1402,11 +1414,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1402 | bfr.kind_v = bfr.uc_kind_v; | 1414 | bfr.kind_v = bfr.uc_kind_v; |
1403 | } else { | 1415 | } else { |
1404 | gk20a_get_comptags(d, dmabuf, &comptags); | 1416 | gk20a_get_comptags(d, dmabuf, &comptags); |
1405 | 1417 | clear_ctags = true; | |
1406 | /* init/clear the ctag buffer */ | ||
1407 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, | ||
1408 | comptags.offset, | ||
1409 | comptags.offset + comptags.lines - 1); | ||
1410 | } | 1418 | } |
1411 | } | 1419 | } |
1412 | 1420 | ||
@@ -1414,15 +1422,15 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1414 | bfr.ctag_offset = comptags.offset; | 1422 | bfr.ctag_offset = comptags.offset; |
1415 | 1423 | ||
1416 | /* update gmmu ptes */ | 1424 | /* update gmmu ptes */ |
1417 | map_offset = __locked_gmmu_map(vm, map_offset, | 1425 | map_offset = g->ops.mm.gmmu_map(vm, map_offset, |
1418 | bfr.sgt, | 1426 | bfr.sgt, |
1419 | buffer_offset, /* sg offset */ | 1427 | buffer_offset, /* sg offset */ |
1420 | mapping_size, | 1428 | mapping_size, |
1421 | bfr.pgsz_idx, | 1429 | bfr.pgsz_idx, |
1422 | bfr.kind_v, | 1430 | bfr.kind_v, |
1423 | bfr.ctag_offset, | 1431 | bfr.ctag_offset, |
1424 | flags, rw_flag); | 1432 | flags, rw_flag, |
1425 | 1433 | clear_ctags); | |
1426 | if (!map_offset) | 1434 | if (!map_offset) |
1427 | goto clean_up; | 1435 | goto clean_up; |
1428 | 1436 | ||
@@ -1531,17 +1539,18 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, | |||
1531 | u32 flags, | 1539 | u32 flags, |
1532 | int rw_flag) | 1540 | int rw_flag) |
1533 | { | 1541 | { |
1542 | struct gk20a *g = gk20a_from_vm(vm); | ||
1534 | u64 vaddr; | 1543 | u64 vaddr; |
1535 | 1544 | ||
1536 | mutex_lock(&vm->update_gmmu_lock); | 1545 | mutex_lock(&vm->update_gmmu_lock); |
1537 | vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */ | 1546 | vaddr = g->ops.mm.gmmu_map(vm, 0, /* already mapped? - No */ |
1538 | *sgt, /* sg table */ | 1547 | *sgt, /* sg table */ |
1539 | 0, /* sg offset */ | 1548 | 0, /* sg offset */ |
1540 | size, | 1549 | size, |
1541 | 0, /* page size index = 0 i.e. SZ_4K */ | 1550 | 0, /* page size index = 0 i.e. SZ_4K */ |
1542 | 0, /* kind */ | 1551 | 0, /* kind */ |
1543 | 0, /* ctag_offset */ | 1552 | 0, /* ctag_offset */ |
1544 | flags, rw_flag); | 1553 | flags, rw_flag, false); |
1545 | mutex_unlock(&vm->update_gmmu_lock); | 1554 | mutex_unlock(&vm->update_gmmu_lock); |
1546 | if (!vaddr) { | 1555 | if (!vaddr) { |
1547 | gk20a_err(dev_from_vm(vm), "failed to allocate va space"); | 1556 | gk20a_err(dev_from_vm(vm), "failed to allocate va space"); |
@@ -1549,7 +1558,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, | |||
1549 | } | 1558 | } |
1550 | 1559 | ||
1551 | /* Invalidate kernel mappings immediately */ | 1560 | /* Invalidate kernel mappings immediately */ |
1552 | gk20a_mm_tlb_invalidate(vm); | 1561 | g->ops.mm.tlb_invalidate(vm); |
1553 | 1562 | ||
1554 | return vaddr; | 1563 | return vaddr; |
1555 | } | 1564 | } |
@@ -1573,8 +1582,10 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm, | |||
1573 | u64 size, | 1582 | u64 size, |
1574 | int rw_flag) | 1583 | int rw_flag) |
1575 | { | 1584 | { |
1585 | struct gk20a *g = gk20a_from_vm(vm); | ||
1586 | |||
1576 | mutex_lock(&vm->update_gmmu_lock); | 1587 | mutex_lock(&vm->update_gmmu_lock); |
1577 | __locked_gmmu_unmap(vm, | 1588 | g->ops.mm.gmmu_unmap(vm, |
1578 | vaddr, | 1589 | vaddr, |
1579 | size, | 1590 | size, |
1580 | 0, /* page size 4K */ | 1591 | 0, /* page size 4K */ |
@@ -1970,10 +1981,10 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr, | |||
1970 | } | 1981 | } |
1971 | 1982 | ||
1972 | for (i = 0; i < num_pages; i++) { | 1983 | for (i = 0; i < num_pages; i++) { |
1973 | u64 page_vaddr = __locked_gmmu_map(vm, vaddr, | 1984 | u64 page_vaddr = g->ops.mm.gmmu_map(vm, vaddr, |
1974 | vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0, | 1985 | vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0, |
1975 | NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET, | 1986 | NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET, |
1976 | gk20a_mem_flag_none); | 1987 | gk20a_mem_flag_none, false); |
1977 | 1988 | ||
1978 | if (!page_vaddr) { | 1989 | if (!page_vaddr) { |
1979 | gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!"); | 1990 | gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!"); |
@@ -1990,7 +2001,7 @@ err_unmap: | |||
1990 | /* something went wrong. unmap pages */ | 2001 | /* something went wrong. unmap pages */ |
1991 | while (i--) { | 2002 | while (i--) { |
1992 | vaddr -= pgsz; | 2003 | vaddr -= pgsz; |
1993 | __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0, | 2004 | g->ops.mm.gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0, |
1994 | gk20a_mem_flag_none); | 2005 | gk20a_mem_flag_none); |
1995 | } | 2006 | } |
1996 | 2007 | ||
@@ -2005,12 +2016,14 @@ static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | |||
2005 | 2016 | ||
2006 | void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr, | 2017 | void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr, |
2007 | u64 size, u32 pgsz_idx) { | 2018 | u64 size, u32 pgsz_idx) { |
2008 | __locked_gmmu_unmap(vm, vaddr, size, pgsz_idx, | 2019 | struct gk20a *g = vm->mm->g; |
2009 | false, gk20a_mem_flag_none); | 2020 | |
2021 | g->ops.mm.gmmu_unmap(vm, vaddr, size, pgsz_idx, | ||
2022 | false, gk20a_mem_flag_none); | ||
2010 | } | 2023 | } |
2011 | 2024 | ||
2012 | /* NOTE! mapped_buffers lock must be held */ | 2025 | /* NOTE! mapped_buffers lock must be held */ |
2013 | static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | 2026 | void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) |
2014 | { | 2027 | { |
2015 | struct vm_gk20a *vm = mapped_buffer->vm; | 2028 | struct vm_gk20a *vm = mapped_buffer->vm; |
2016 | struct gk20a *g = vm->mm->g; | 2029 | struct gk20a *g = vm->mm->g; |
@@ -2026,7 +2039,7 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | |||
2026 | if (g->ops.mm.put_empty) { | 2039 | if (g->ops.mm.put_empty) { |
2027 | g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx); | 2040 | g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx); |
2028 | } else { | 2041 | } else { |
2029 | __locked_gmmu_unmap(vm, | 2042 | g->ops.mm.gmmu_unmap(vm, |
2030 | mapped_buffer->addr, | 2043 | mapped_buffer->addr, |
2031 | mapped_buffer->size, | 2044 | mapped_buffer->size, |
2032 | mapped_buffer->pgsz_idx, | 2045 | mapped_buffer->pgsz_idx, |
@@ -2036,7 +2049,7 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | |||
2036 | num_pages, pgsz_idx, false); | 2049 | num_pages, pgsz_idx, false); |
2037 | } | 2050 | } |
2038 | } else | 2051 | } else |
2039 | __locked_gmmu_unmap(vm, | 2052 | g->ops.mm.gmmu_unmap(vm, |
2040 | mapped_buffer->addr, | 2053 | mapped_buffer->addr, |
2041 | mapped_buffer->size, | 2054 | mapped_buffer->size, |
2042 | mapped_buffer->pgsz_idx, | 2055 | mapped_buffer->pgsz_idx, |
@@ -2085,7 +2098,7 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset) | |||
2085 | mutex_unlock(&vm->update_gmmu_lock); | 2098 | mutex_unlock(&vm->update_gmmu_lock); |
2086 | } | 2099 | } |
2087 | 2100 | ||
2088 | static void gk20a_vm_remove_support(struct vm_gk20a *vm) | 2101 | void gk20a_vm_remove_support(struct vm_gk20a *vm) |
2089 | { | 2102 | { |
2090 | struct gk20a *g = vm->mm->g; | 2103 | struct gk20a *g = vm->mm->g; |
2091 | struct mapped_buffer_node *mapped_buffer; | 2104 | struct mapped_buffer_node *mapped_buffer; |
@@ -2156,7 +2169,8 @@ static void gk20a_vm_remove_support(struct vm_gk20a *vm) | |||
2156 | static void gk20a_vm_remove_support_kref(struct kref *ref) | 2169 | static void gk20a_vm_remove_support_kref(struct kref *ref) |
2157 | { | 2170 | { |
2158 | struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref); | 2171 | struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref); |
2159 | gk20a_vm_remove_support(vm); | 2172 | struct gk20a *g = gk20a_from_vm(vm); |
2173 | g->ops.mm.vm_remove(vm); | ||
2160 | } | 2174 | } |
2161 | 2175 | ||
2162 | void gk20a_vm_get(struct vm_gk20a *vm) | 2176 | void gk20a_vm_get(struct vm_gk20a *vm) |
@@ -3124,5 +3138,14 @@ void gk20a_init_mm(struct gpu_ops *gops) | |||
3124 | gops->mm.put_empty = gk20a_vm_put_empty; | 3138 | gops->mm.put_empty = gk20a_vm_put_empty; |
3125 | gops->mm.clear_sparse = gk20a_vm_clear_sparse; | 3139 | gops->mm.clear_sparse = gk20a_vm_clear_sparse; |
3126 | gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; | 3140 | gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; |
3141 | gops->mm.gmmu_map = gk20a_locked_gmmu_map; | ||
3142 | gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; | ||
3143 | gops->mm.vm_remove = gk20a_vm_remove_support; | ||
3144 | gops->mm.vm_alloc_share = gk20a_vm_alloc_share; | ||
3145 | gops->mm.vm_bind_channel = gk20a_vm_bind_channel; | ||
3146 | gops->mm.fb_flush = gk20a_mm_fb_flush; | ||
3147 | gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; | ||
3148 | gops->mm.l2_flush = gk20a_mm_l2_flush; | ||
3149 | gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; | ||
3127 | } | 3150 | } |
3128 | 3151 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index b8726c62..f06c465a 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -318,6 +318,10 @@ struct vm_gk20a { | |||
318 | dma_addr_t zero_page_iova; | 318 | dma_addr_t zero_page_iova; |
319 | void *zero_page_cpuva; | 319 | void *zero_page_cpuva; |
320 | struct sg_table *zero_page_sgt; | 320 | struct sg_table *zero_page_sgt; |
321 | |||
322 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
323 | u64 handle; | ||
324 | #endif | ||
321 | }; | 325 | }; |
322 | 326 | ||
323 | struct gk20a; | 327 | struct gk20a; |
@@ -438,11 +442,30 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, | |||
438 | u32 flags, | 442 | u32 flags, |
439 | int rw_flag); | 443 | int rw_flag); |
440 | 444 | ||
445 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | ||
446 | u64 map_offset, | ||
447 | struct sg_table *sgt, | ||
448 | u64 buffer_offset, | ||
449 | u64 size, | ||
450 | int pgsz_idx, | ||
451 | u8 kind_v, | ||
452 | u32 ctag_offset, | ||
453 | u32 flags, | ||
454 | int rw_flag, | ||
455 | bool clear_ctags); | ||
456 | |||
441 | void gk20a_gmmu_unmap(struct vm_gk20a *vm, | 457 | void gk20a_gmmu_unmap(struct vm_gk20a *vm, |
442 | u64 vaddr, | 458 | u64 vaddr, |
443 | u64 size, | 459 | u64 size, |
444 | int rw_flag); | 460 | int rw_flag); |
445 | 461 | ||
462 | void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | ||
463 | u64 vaddr, | ||
464 | u64 size, | ||
465 | int pgsz_idx, | ||
466 | bool va_allocated, | ||
467 | int rw_flag); | ||
468 | |||
446 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); | 469 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); |
447 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | 470 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, |
448 | struct sg_table *sgt); | 471 | struct sg_table *sgt); |
@@ -461,6 +484,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
461 | /* unmap handle from kernel */ | 484 | /* unmap handle from kernel */ |
462 | void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); | 485 | void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); |
463 | 486 | ||
487 | void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); | ||
488 | |||
464 | /* get reference to all currently mapped buffers */ | 489 | /* get reference to all currently mapped buffers */ |
465 | int gk20a_vm_get_buffers(struct vm_gk20a *vm, | 490 | int gk20a_vm_get_buffers(struct vm_gk20a *vm, |
466 | struct mapped_buffer_node ***mapped_buffers, | 491 | struct mapped_buffer_node ***mapped_buffers, |
@@ -482,6 +507,16 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, | |||
482 | void gk20a_vm_get(struct vm_gk20a *vm); | 507 | void gk20a_vm_get(struct vm_gk20a *vm); |
483 | void gk20a_vm_put(struct vm_gk20a *vm); | 508 | void gk20a_vm_put(struct vm_gk20a *vm); |
484 | 509 | ||
510 | void gk20a_vm_remove_support(struct vm_gk20a *vm); | ||
511 | |||
512 | u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | ||
513 | u64 size, | ||
514 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx); | ||
515 | |||
516 | int gk20a_vm_free_va(struct vm_gk20a *vm, | ||
517 | u64 offset, u64 size, | ||
518 | enum gmmu_pgsz_gk20a pgsz_idx); | ||
519 | |||
485 | /* vm-as interface */ | 520 | /* vm-as interface */ |
486 | struct nvhost_as_alloc_space_args; | 521 | struct nvhost_as_alloc_space_args; |
487 | struct nvhost_as_free_space_args; | 522 | struct nvhost_as_free_space_args; |
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h index 6dd0c0db..e6ed9898 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h | |||
@@ -151,6 +151,12 @@ struct gk20a_platform { | |||
151 | * of the CPU. | 151 | * of the CPU. |
152 | */ | 152 | */ |
153 | void (*dump_platform_dependencies)(struct platform_device *dev); | 153 | void (*dump_platform_dependencies)(struct platform_device *dev); |
154 | |||
155 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
156 | bool virtual_dev; | ||
157 | u64 virt_handle; | ||
158 | struct task_struct *intr_handler; | ||
159 | #endif | ||
154 | }; | 160 | }; |
155 | 161 | ||
156 | static inline struct gk20a_platform *gk20a_get_platform( | 162 | static inline struct gk20a_platform *gk20a_get_platform( |
@@ -163,6 +169,9 @@ extern struct gk20a_platform gk20a_generic_platform; | |||
163 | #ifdef CONFIG_TEGRA_GK20A | 169 | #ifdef CONFIG_TEGRA_GK20A |
164 | extern struct gk20a_platform gk20a_tegra_platform; | 170 | extern struct gk20a_platform gk20a_tegra_platform; |
165 | extern struct gk20a_platform gm20b_tegra_platform; | 171 | extern struct gk20a_platform gm20b_tegra_platform; |
172 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
173 | extern struct gk20a_platform vgpu_tegra_platform; | ||
174 | #endif | ||
166 | #endif | 175 | #endif |
167 | 176 | ||
168 | static inline bool gk20a_platform_has_syncpoints(struct platform_device *dev) | 177 | static inline bool gk20a_platform_has_syncpoints(struct platform_device *dev) |
diff --git a/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c new file mode 100644 index 00000000..ea4fde79 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c | |||
@@ -0,0 +1,64 @@ | |||
1 | /* | ||
2 | * Tegra Virtualized GPU Platform Interface | ||
3 | * | ||
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/of_platform.h> | ||
17 | |||
18 | #include "gk20a.h" | ||
19 | #include "hal_gk20a.h" | ||
20 | #include "platform_gk20a.h" | ||
21 | |||
22 | static int gk20a_tegra_probe(struct platform_device *dev) | ||
23 | { | ||
24 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
25 | struct device_node *np = dev->dev.of_node; | ||
26 | const __be32 *host1x_ptr; | ||
27 | struct platform_device *host1x_pdev = NULL; | ||
28 | |||
29 | host1x_ptr = of_get_property(np, "nvidia,host1x", NULL); | ||
30 | if (host1x_ptr) { | ||
31 | struct device_node *host1x_node = | ||
32 | of_find_node_by_phandle(be32_to_cpup(host1x_ptr)); | ||
33 | |||
34 | host1x_pdev = of_find_device_by_node(host1x_node); | ||
35 | if (!host1x_pdev) { | ||
36 | dev_warn(&dev->dev, "host1x device not available"); | ||
37 | return -EPROBE_DEFER; | ||
38 | } | ||
39 | |||
40 | } else { | ||
41 | host1x_pdev = to_platform_device(dev->dev.parent); | ||
42 | dev_warn(&dev->dev, "host1x reference not found. assuming host1x to be parent"); | ||
43 | } | ||
44 | |||
45 | platform->g->host1x_dev = host1x_pdev; | ||
46 | |||
47 | return 0; | ||
48 | } | ||
49 | |||
50 | struct gk20a_platform vgpu_tegra_platform = { | ||
51 | .has_syncpoints = true, | ||
52 | |||
53 | /* power management configuration */ | ||
54 | .can_railgate = false, | ||
55 | .enable_slcg = false, | ||
56 | .enable_blcg = false, | ||
57 | .enable_elcg = false, | ||
58 | .enable_elpg = false, | ||
59 | .enable_aelpg = false, | ||
60 | |||
61 | .probe = gk20a_tegra_probe, | ||
62 | |||
63 | .virtual_dev = true, | ||
64 | }; | ||
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index 7e580136..86d049cf 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | |||
@@ -102,5 +102,14 @@ static void gm20b_fifo_trigger_mmu_fault(struct gk20a *g, | |||
102 | void gm20b_init_fifo(struct gpu_ops *gops) | 102 | void gm20b_init_fifo(struct gpu_ops *gops) |
103 | { | 103 | { |
104 | gops->fifo.bind_channel = channel_gm20b_bind; | 104 | gops->fifo.bind_channel = channel_gm20b_bind; |
105 | gops->fifo.unbind_channel = channel_gk20a_unbind; | ||
106 | gops->fifo.disable_channel = channel_gk20a_disable; | ||
107 | gops->fifo.alloc_inst = channel_gk20a_alloc_inst; | ||
108 | gops->fifo.free_inst = channel_gk20a_free_inst; | ||
109 | gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; | ||
110 | |||
111 | gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; | ||
112 | gops->fifo.update_runlist = gk20a_fifo_update_runlist; | ||
105 | gops->fifo.trigger_mmu_fault = gm20b_fifo_trigger_mmu_fault; | 113 | gops->fifo.trigger_mmu_fault = gm20b_fifo_trigger_mmu_fault; |
114 | gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; | ||
106 | } | 115 | } |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 72500b0e..c9c32b9f 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -751,4 +751,9 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
751 | gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; | 751 | gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; |
752 | #endif | 752 | #endif |
753 | gops->gr.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask; | 753 | gops->gr.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask; |
754 | gops->gr.free_channel_ctx = gk20a_free_channel_ctx; | ||
755 | gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx; | ||
756 | gops->gr.free_obj_ctx = gk20a_free_obj_ctx; | ||
757 | gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull; | ||
758 | gops->gr.get_zcull_info = gr_gk20a_get_zcull_info; | ||
754 | } | 759 | } |
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index ac82d56a..ed5b5e0d 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |||
@@ -327,4 +327,13 @@ void gm20b_init_mm(struct gpu_ops *gops) | |||
327 | gops->mm.set_sparse = gm20b_vm_put_sparse; | 327 | gops->mm.set_sparse = gm20b_vm_put_sparse; |
328 | gops->mm.clear_sparse = gm20b_vm_clear_sparse; | 328 | gops->mm.clear_sparse = gm20b_vm_clear_sparse; |
329 | gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled; | 329 | gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled; |
330 | gops->mm.gmmu_map = gk20a_locked_gmmu_map; | ||
331 | gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; | ||
332 | gops->mm.vm_remove = gk20a_vm_remove_support; | ||
333 | gops->mm.vm_alloc_share = gk20a_vm_alloc_share; | ||
334 | gops->mm.vm_bind_channel = gk20a_vm_bind_channel; | ||
335 | gops->mm.fb_flush = gk20a_mm_fb_flush; | ||
336 | gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; | ||
337 | gops->mm.l2_flush = gk20a_mm_l2_flush; | ||
338 | gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; | ||
330 | } | 339 | } |
diff --git a/drivers/gpu/nvgpu/vgpu/Makefile b/drivers/gpu/nvgpu/vgpu/Makefile new file mode 100644 index 00000000..edad7171 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/Makefile | |||
@@ -0,0 +1,10 @@ | |||
1 | GCOV_PROFILE := y | ||
2 | ccflags-y += -Idrivers/gpu/nvgpu | ||
3 | ccflags-y += -Wno-multichar | ||
4 | |||
5 | obj-$(CONFIG_TEGRA_GR_VIRTUALIZATION) = \ | ||
6 | ltc_vgpu.o \ | ||
7 | gr_vgpu.o \ | ||
8 | fifo_vgpu.o \ | ||
9 | mm_vgpu.o \ | ||
10 | vgpu.o | ||
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c new file mode 100644 index 00000000..23dec1f3 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | |||
@@ -0,0 +1,569 @@ | |||
1 | /* | ||
2 | * Virtualized GPU Fifo | ||
3 | * | ||
4 | * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/dma-mapping.h> | ||
17 | #include "vgpu/vgpu.h" | ||
18 | #include "gk20a/hw_fifo_gk20a.h" | ||
19 | #include "gk20a/hw_ram_gk20a.h" | ||
20 | |||
21 | static void vgpu_channel_bind(struct channel_gk20a *ch) | ||
22 | { | ||
23 | struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); | ||
24 | struct tegra_vgpu_cmd_msg msg; | ||
25 | struct tegra_vgpu_channel_config_params *p = | ||
26 | &msg.params.channel_config; | ||
27 | int err; | ||
28 | |||
29 | gk20a_dbg_info("bind channel %d", ch->hw_chid); | ||
30 | |||
31 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND; | ||
32 | msg.handle = platform->virt_handle; | ||
33 | p->handle = ch->virt_ctx; | ||
34 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
35 | WARN_ON(err || msg.ret); | ||
36 | |||
37 | ch->bound = true; | ||
38 | } | ||
39 | |||
40 | static void vgpu_channel_unbind(struct channel_gk20a *ch) | ||
41 | { | ||
42 | struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); | ||
43 | |||
44 | gk20a_dbg_fn(""); | ||
45 | |||
46 | if (ch->bound) { | ||
47 | struct tegra_vgpu_cmd_msg msg; | ||
48 | struct tegra_vgpu_channel_config_params *p = | ||
49 | &msg.params.channel_config; | ||
50 | int err; | ||
51 | |||
52 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNBIND; | ||
53 | msg.handle = platform->virt_handle; | ||
54 | p->handle = ch->virt_ctx; | ||
55 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
56 | WARN_ON(err || msg.ret); | ||
57 | } | ||
58 | |||
59 | ch->bound = false; | ||
60 | |||
61 | /* | ||
62 | * if we are agrressive then we can destroy the syncpt | ||
63 | * resource at this point | ||
64 | * if not, then it will be destroyed at channel_free() | ||
65 | */ | ||
66 | if (ch->sync && ch->sync->aggressive_destroy) { | ||
67 | ch->sync->destroy(ch->sync); | ||
68 | ch->sync = NULL; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) | ||
73 | { | ||
74 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
75 | struct tegra_vgpu_cmd_msg msg; | ||
76 | struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx; | ||
77 | int err; | ||
78 | |||
79 | gk20a_dbg_fn(""); | ||
80 | |||
81 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX; | ||
82 | msg.handle = platform->virt_handle; | ||
83 | p->id = ch->hw_chid; | ||
84 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
85 | if (err || msg.ret) { | ||
86 | gk20a_err(dev_from_gk20a(g), "fail"); | ||
87 | return -ENOMEM; | ||
88 | } | ||
89 | |||
90 | ch->virt_ctx = p->handle; | ||
91 | gk20a_dbg_fn("done"); | ||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | static void vgpu_channel_free_inst(struct gk20a *g, struct channel_gk20a *ch) | ||
96 | { | ||
97 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
98 | struct tegra_vgpu_cmd_msg msg; | ||
99 | struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx; | ||
100 | int err; | ||
101 | |||
102 | gk20a_dbg_fn(""); | ||
103 | |||
104 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWCTX; | ||
105 | msg.handle = platform->virt_handle; | ||
106 | p->handle = ch->virt_ctx; | ||
107 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
108 | WARN_ON(err || msg.ret); | ||
109 | } | ||
110 | |||
111 | static void vgpu_channel_disable(struct channel_gk20a *ch) | ||
112 | { | ||
113 | struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); | ||
114 | struct tegra_vgpu_cmd_msg msg; | ||
115 | struct tegra_vgpu_channel_config_params *p = | ||
116 | &msg.params.channel_config; | ||
117 | int err; | ||
118 | |||
119 | gk20a_dbg_fn(""); | ||
120 | |||
121 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_DISABLE; | ||
122 | msg.handle = platform->virt_handle; | ||
123 | p->handle = ch->virt_ctx; | ||
124 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
125 | WARN_ON(err || msg.ret); | ||
126 | } | ||
127 | |||
128 | static int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base, | ||
129 | u32 gpfifo_entries) | ||
130 | { | ||
131 | struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); | ||
132 | struct device __maybe_unused *d = dev_from_gk20a(ch->g); | ||
133 | struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); | ||
134 | struct tegra_vgpu_cmd_msg msg; | ||
135 | struct tegra_vgpu_ramfc_params *p = &msg.params.ramfc; | ||
136 | int err; | ||
137 | |||
138 | gk20a_dbg_fn(""); | ||
139 | |||
140 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SETUP_RAMFC; | ||
141 | msg.handle = platform->virt_handle; | ||
142 | p->handle = ch->virt_ctx; | ||
143 | p->gpfifo_va = gpfifo_base; | ||
144 | p->num_entries = gpfifo_entries; | ||
145 | p->userd_addr = ch->userd_iova; | ||
146 | p->iova = mapping ? 1 : 0; | ||
147 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
148 | |||
149 | return (err || msg.ret) ? -ENOMEM : 0; | ||
150 | } | ||
151 | |||
152 | static int init_engine_info(struct fifo_gk20a *f) | ||
153 | { | ||
154 | struct fifo_engine_info_gk20a *gr_info; | ||
155 | const u32 gr_sw_id = ENGINE_GR_GK20A; | ||
156 | |||
157 | gk20a_dbg_fn(""); | ||
158 | |||
159 | /* all we really care about finding is the graphics entry */ | ||
160 | /* especially early on in sim it probably thinks it has more */ | ||
161 | f->num_engines = 1; | ||
162 | |||
163 | gr_info = f->engine_info + gr_sw_id; | ||
164 | |||
165 | gr_info->sw_id = gr_sw_id; | ||
166 | gr_info->name = "gr"; | ||
167 | /* FIXME: retrieve this from server */ | ||
168 | gr_info->runlist_id = 0; | ||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | ||
173 | { | ||
174 | struct fifo_engine_info_gk20a *engine_info; | ||
175 | struct fifo_runlist_info_gk20a *runlist; | ||
176 | struct device *d = dev_from_gk20a(g); | ||
177 | u32 runlist_id; | ||
178 | u32 i; | ||
179 | u64 runlist_size; | ||
180 | |||
181 | gk20a_dbg_fn(""); | ||
182 | |||
183 | f->max_runlists = fifo_eng_runlist_base__size_1_v(); | ||
184 | f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) * | ||
185 | f->max_runlists, GFP_KERNEL); | ||
186 | if (!f->runlist_info) | ||
187 | goto clean_up; | ||
188 | |||
189 | engine_info = f->engine_info + ENGINE_GR_GK20A; | ||
190 | runlist_id = engine_info->runlist_id; | ||
191 | runlist = &f->runlist_info[runlist_id]; | ||
192 | |||
193 | runlist->active_channels = | ||
194 | kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), | ||
195 | GFP_KERNEL); | ||
196 | if (!runlist->active_channels) | ||
197 | goto clean_up_runlist_info; | ||
198 | |||
199 | runlist_size = sizeof(u16) * f->num_channels; | ||
200 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | ||
201 | dma_addr_t iova; | ||
202 | |||
203 | runlist->mem[i].cpuva = | ||
204 | dma_alloc_coherent(d, | ||
205 | runlist_size, | ||
206 | &iova, | ||
207 | GFP_KERNEL); | ||
208 | if (!runlist->mem[i].cpuva) { | ||
209 | dev_err(d, "memory allocation failed\n"); | ||
210 | goto clean_up_runlist; | ||
211 | } | ||
212 | runlist->mem[i].iova = iova; | ||
213 | runlist->mem[i].size = runlist_size; | ||
214 | } | ||
215 | mutex_init(&runlist->mutex); | ||
216 | init_waitqueue_head(&runlist->runlist_wq); | ||
217 | |||
218 | /* None of buffers is pinned if this value doesn't change. | ||
219 | Otherwise, one of them (cur_buffer) must have been pinned. */ | ||
220 | runlist->cur_buffer = MAX_RUNLIST_BUFFERS; | ||
221 | |||
222 | gk20a_dbg_fn("done"); | ||
223 | return 0; | ||
224 | |||
225 | clean_up_runlist: | ||
226 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | ||
227 | if (runlist->mem[i].cpuva) | ||
228 | dma_free_coherent(d, | ||
229 | runlist->mem[i].size, | ||
230 | runlist->mem[i].cpuva, | ||
231 | runlist->mem[i].iova); | ||
232 | runlist->mem[i].cpuva = NULL; | ||
233 | runlist->mem[i].iova = 0; | ||
234 | } | ||
235 | |||
236 | kfree(runlist->active_channels); | ||
237 | runlist->active_channels = NULL; | ||
238 | |||
239 | clean_up_runlist_info: | ||
240 | kfree(f->runlist_info); | ||
241 | f->runlist_info = NULL; | ||
242 | |||
243 | clean_up: | ||
244 | gk20a_dbg_fn("fail"); | ||
245 | return -ENOMEM; | ||
246 | } | ||
247 | |||
248 | static int vgpu_init_fifo_setup_sw(struct gk20a *g) | ||
249 | { | ||
250 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
251 | struct fifo_gk20a *f = &g->fifo; | ||
252 | struct device *d = dev_from_gk20a(g); | ||
253 | int chid, err = 0; | ||
254 | dma_addr_t iova; | ||
255 | |||
256 | gk20a_dbg_fn(""); | ||
257 | |||
258 | if (f->sw_ready) { | ||
259 | gk20a_dbg_fn("skip init"); | ||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | f->g = g; | ||
264 | |||
265 | err = vgpu_get_attribute(platform->virt_handle, | ||
266 | TEGRA_VGPU_ATTRIB_NUM_CHANNELS, | ||
267 | &f->num_channels); | ||
268 | if (err) | ||
269 | return -ENXIO; | ||
270 | |||
271 | f->max_engines = ENGINE_INVAL_GK20A; | ||
272 | |||
273 | f->userd_entry_size = 1 << ram_userd_base_shift_v(); | ||
274 | f->userd_total_size = f->userd_entry_size * f->num_channels; | ||
275 | |||
276 | f->userd.cpuva = dma_alloc_coherent(d, | ||
277 | f->userd_total_size, | ||
278 | &iova, | ||
279 | GFP_KERNEL); | ||
280 | if (!f->userd.cpuva) { | ||
281 | dev_err(d, "memory allocation failed\n"); | ||
282 | goto clean_up; | ||
283 | } | ||
284 | |||
285 | f->userd.iova = iova; | ||
286 | err = gk20a_get_sgtable(d, &f->userd.sgt, | ||
287 | f->userd.cpuva, f->userd.iova, | ||
288 | f->userd_total_size); | ||
289 | if (err) { | ||
290 | dev_err(d, "failed to create sg table\n"); | ||
291 | goto clean_up; | ||
292 | } | ||
293 | |||
294 | /* bar1 va */ | ||
295 | f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size); | ||
296 | if (!f->userd.gpu_va) { | ||
297 | dev_err(d, "gmmu mapping failed\n"); | ||
298 | goto clean_up; | ||
299 | } | ||
300 | |||
301 | gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); | ||
302 | |||
303 | f->userd.size = f->userd_total_size; | ||
304 | |||
305 | f->channel = kzalloc(f->num_channels * sizeof(*f->channel), | ||
306 | GFP_KERNEL); | ||
307 | f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info), | ||
308 | GFP_KERNEL); | ||
309 | |||
310 | if (!(f->channel && f->engine_info)) { | ||
311 | err = -ENOMEM; | ||
312 | goto clean_up; | ||
313 | } | ||
314 | |||
315 | init_engine_info(f); | ||
316 | |||
317 | init_runlist(g, f); | ||
318 | |||
319 | for (chid = 0; chid < f->num_channels; chid++) { | ||
320 | f->channel[chid].userd_cpu_va = | ||
321 | f->userd.cpuva + chid * f->userd_entry_size; | ||
322 | f->channel[chid].userd_iova = | ||
323 | NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova) | ||
324 | + chid * f->userd_entry_size; | ||
325 | f->channel[chid].userd_gpu_va = | ||
326 | f->userd.gpu_va + chid * f->userd_entry_size; | ||
327 | |||
328 | gk20a_init_channel_support(g, chid); | ||
329 | } | ||
330 | mutex_init(&f->ch_inuse_mutex); | ||
331 | |||
332 | f->deferred_reset_pending = false; | ||
333 | mutex_init(&f->deferred_reset_mutex); | ||
334 | |||
335 | f->sw_ready = true; | ||
336 | |||
337 | gk20a_dbg_fn("done"); | ||
338 | return 0; | ||
339 | |||
340 | clean_up: | ||
341 | gk20a_dbg_fn("fail"); | ||
342 | /* FIXME: unmap from bar1 */ | ||
343 | if (f->userd.sgt) | ||
344 | gk20a_free_sgtable(&f->userd.sgt); | ||
345 | if (f->userd.cpuva) | ||
346 | dma_free_coherent(d, | ||
347 | f->userd_total_size, | ||
348 | f->userd.cpuva, | ||
349 | f->userd.iova); | ||
350 | f->userd.cpuva = NULL; | ||
351 | f->userd.iova = 0; | ||
352 | |||
353 | memset(&f->userd, 0, sizeof(struct userd_desc)); | ||
354 | |||
355 | kfree(f->channel); | ||
356 | f->channel = NULL; | ||
357 | kfree(f->engine_info); | ||
358 | f->engine_info = NULL; | ||
359 | |||
360 | return err; | ||
361 | } | ||
362 | |||
363 | static int vgpu_init_fifo_setup_hw(struct gk20a *g) | ||
364 | { | ||
365 | gk20a_dbg_fn(""); | ||
366 | |||
367 | /* test write, read through bar1 @ userd region before | ||
368 | * turning on the snooping */ | ||
369 | { | ||
370 | struct fifo_gk20a *f = &g->fifo; | ||
371 | u32 v, v1 = 0x33, v2 = 0x55; | ||
372 | |||
373 | u32 bar1_vaddr = f->userd.gpu_va; | ||
374 | volatile u32 *cpu_vaddr = f->userd.cpuva; | ||
375 | |||
376 | gk20a_dbg_info("test bar1 @ vaddr 0x%x", | ||
377 | bar1_vaddr); | ||
378 | |||
379 | v = gk20a_bar1_readl(g, bar1_vaddr); | ||
380 | |||
381 | *cpu_vaddr = v1; | ||
382 | smp_mb(); | ||
383 | |||
384 | if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { | ||
385 | gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!"); | ||
386 | return -EINVAL; | ||
387 | } | ||
388 | |||
389 | gk20a_bar1_writel(g, bar1_vaddr, v2); | ||
390 | |||
391 | if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) { | ||
392 | gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!"); | ||
393 | return -EINVAL; | ||
394 | } | ||
395 | |||
396 | /* is it visible to the cpu? */ | ||
397 | if (*cpu_vaddr != v2) { | ||
398 | gk20a_err(dev_from_gk20a(g), | ||
399 | "cpu didn't see bar1 write @ %p!", | ||
400 | cpu_vaddr); | ||
401 | } | ||
402 | |||
403 | /* put it back */ | ||
404 | gk20a_bar1_writel(g, bar1_vaddr, v); | ||
405 | } | ||
406 | |||
407 | gk20a_dbg_fn("done"); | ||
408 | |||
409 | return 0; | ||
410 | } | ||
411 | |||
412 | int vgpu_init_fifo_support(struct gk20a *g) | ||
413 | { | ||
414 | u32 err; | ||
415 | |||
416 | gk20a_dbg_fn(""); | ||
417 | |||
418 | err = vgpu_init_fifo_setup_sw(g); | ||
419 | if (err) | ||
420 | return err; | ||
421 | |||
422 | err = vgpu_init_fifo_setup_hw(g); | ||
423 | return err; | ||
424 | } | ||
425 | |||
426 | static int vgpu_fifo_preempt_channel(struct gk20a *g, u32 hw_chid) | ||
427 | { | ||
428 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
429 | struct fifo_gk20a *f = &g->fifo; | ||
430 | struct tegra_vgpu_cmd_msg msg; | ||
431 | struct tegra_vgpu_channel_config_params *p = | ||
432 | &msg.params.channel_config; | ||
433 | int err; | ||
434 | |||
435 | gk20a_dbg_fn(""); | ||
436 | |||
437 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT; | ||
438 | msg.handle = platform->virt_handle; | ||
439 | p->handle = f->channel[hw_chid].virt_ctx; | ||
440 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
441 | |||
442 | if (err || msg.ret) { | ||
443 | gk20a_err(dev_from_gk20a(g), | ||
444 | "preempt channel %d failed\n", hw_chid); | ||
445 | err = -ENOMEM; | ||
446 | } | ||
447 | |||
448 | return err; | ||
449 | } | ||
450 | |||
451 | static int vgpu_submit_runlist(u64 handle, u8 runlist_id, u16 *runlist, | ||
452 | u32 num_entries) | ||
453 | { | ||
454 | struct tegra_vgpu_cmd_msg *msg; | ||
455 | struct tegra_vgpu_runlist_params *p; | ||
456 | size_t size = sizeof(*msg) + sizeof(*runlist) * num_entries; | ||
457 | char *ptr; | ||
458 | int err; | ||
459 | |||
460 | msg = kmalloc(size, GFP_KERNEL); | ||
461 | if (!msg) | ||
462 | return -1; | ||
463 | |||
464 | msg->cmd = TEGRA_VGPU_CMD_SUBMIT_RUNLIST; | ||
465 | msg->handle = handle; | ||
466 | p = &msg->params.runlist; | ||
467 | p->runlist_id = runlist_id; | ||
468 | p->num_entries = num_entries; | ||
469 | |||
470 | ptr = (char *)msg + sizeof(*msg); | ||
471 | memcpy(ptr, runlist, sizeof(*runlist) * num_entries); | ||
472 | err = vgpu_comm_sendrecv(msg, size, sizeof(*msg)); | ||
473 | |||
474 | err = (err || msg->ret) ? -1 : 0; | ||
475 | kfree(msg); | ||
476 | return err; | ||
477 | } | ||
478 | |||
479 | static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | ||
480 | u32 hw_chid, bool add, | ||
481 | bool wait_for_finish) | ||
482 | { | ||
483 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
484 | struct fifo_gk20a *f = &g->fifo; | ||
485 | struct fifo_runlist_info_gk20a *runlist; | ||
486 | u16 *runlist_entry = NULL; | ||
487 | u32 count = 0; | ||
488 | |||
489 | gk20a_dbg_fn(""); | ||
490 | |||
491 | runlist = &f->runlist_info[runlist_id]; | ||
492 | |||
493 | /* valid channel, add/remove it from active list. | ||
494 | Otherwise, keep active list untouched for suspend/resume. */ | ||
495 | if (hw_chid != ~0) { | ||
496 | if (add) { | ||
497 | if (test_and_set_bit(hw_chid, | ||
498 | runlist->active_channels) == 1) | ||
499 | return 0; | ||
500 | } else { | ||
501 | if (test_and_clear_bit(hw_chid, | ||
502 | runlist->active_channels) == 0) | ||
503 | return 0; | ||
504 | } | ||
505 | } | ||
506 | |||
507 | if (hw_chid != ~0 || /* add/remove a valid channel */ | ||
508 | add /* resume to add all channels back */) { | ||
509 | u32 chid; | ||
510 | |||
511 | runlist_entry = runlist->mem[0].cpuva; | ||
512 | for_each_set_bit(chid, | ||
513 | runlist->active_channels, f->num_channels) { | ||
514 | gk20a_dbg_info("add channel %d to runlist", chid); | ||
515 | runlist_entry[0] = chid; | ||
516 | runlist_entry++; | ||
517 | count++; | ||
518 | } | ||
519 | } else /* suspend to remove all channels */ | ||
520 | count = 0; | ||
521 | |||
522 | return vgpu_submit_runlist(platform->virt_handle, runlist_id, | ||
523 | runlist->mem[0].cpuva, count); | ||
524 | } | ||
525 | |||
526 | /* add/remove a channel from runlist | ||
527 | special cases below: runlist->active_channels will NOT be changed. | ||
528 | (hw_chid == ~0 && !add) means remove all active channels from runlist. | ||
529 | (hw_chid == ~0 && add) means restore all active channels on runlist. */ | ||
530 | static int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id, | ||
531 | u32 hw_chid, bool add, bool wait_for_finish) | ||
532 | { | ||
533 | struct fifo_runlist_info_gk20a *runlist = NULL; | ||
534 | struct fifo_gk20a *f = &g->fifo; | ||
535 | u32 ret = 0; | ||
536 | |||
537 | gk20a_dbg_fn(""); | ||
538 | |||
539 | runlist = &f->runlist_info[runlist_id]; | ||
540 | |||
541 | mutex_lock(&runlist->mutex); | ||
542 | |||
543 | ret = vgpu_fifo_update_runlist_locked(g, runlist_id, hw_chid, add, | ||
544 | wait_for_finish); | ||
545 | |||
546 | mutex_unlock(&runlist->mutex); | ||
547 | return ret; | ||
548 | } | ||
549 | |||
550 | static int vgpu_fifo_wait_engine_idle(struct gk20a *g) | ||
551 | { | ||
552 | gk20a_dbg_fn(""); | ||
553 | |||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | void vgpu_init_fifo_ops(struct gpu_ops *gops) | ||
558 | { | ||
559 | gops->fifo.bind_channel = vgpu_channel_bind; | ||
560 | gops->fifo.unbind_channel = vgpu_channel_unbind; | ||
561 | gops->fifo.disable_channel = vgpu_channel_disable; | ||
562 | gops->fifo.alloc_inst = vgpu_channel_alloc_inst; | ||
563 | gops->fifo.free_inst = vgpu_channel_free_inst; | ||
564 | gops->fifo.setup_ramfc = vgpu_channel_setup_ramfc; | ||
565 | gops->fifo.preempt_channel = vgpu_fifo_preempt_channel; | ||
566 | gops->fifo.update_runlist = vgpu_fifo_update_runlist; | ||
567 | gops->fifo.wait_engine_idle = vgpu_fifo_wait_engine_idle; | ||
568 | } | ||
569 | |||
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c new file mode 100644 index 00000000..a7e966da --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c | |||
@@ -0,0 +1,687 @@ | |||
1 | /* | ||
2 | * Virtualized GPU Graphics | ||
3 | * | ||
4 | * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include "vgpu/vgpu.h" | ||
17 | #include "gk20a/hw_gr_gk20a.h" | ||
18 | |||
19 | static int vgpu_gr_commit_inst(struct channel_gk20a *c, u64 gpu_va) | ||
20 | { | ||
21 | struct gk20a_platform *platform = gk20a_get_platform(c->g->dev); | ||
22 | struct tegra_vgpu_cmd_msg msg; | ||
23 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | ||
24 | int err; | ||
25 | |||
26 | gk20a_dbg_fn(""); | ||
27 | |||
28 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_CTX; | ||
29 | msg.handle = platform->virt_handle; | ||
30 | p->handle = c->virt_ctx; | ||
31 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
32 | |||
33 | return (err || msg.ret) ? -1 : 0; | ||
34 | } | ||
35 | |||
36 | static int vgpu_gr_commit_global_ctx_buffers(struct gk20a *g, | ||
37 | struct channel_gk20a *c, bool patch) | ||
38 | { | ||
39 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
40 | struct tegra_vgpu_cmd_msg msg; | ||
41 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | ||
42 | int err; | ||
43 | |||
44 | gk20a_dbg_fn(""); | ||
45 | |||
46 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_GLOBAL_CTX; | ||
47 | msg.handle = platform->virt_handle; | ||
48 | p->handle = c->virt_ctx; | ||
49 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
50 | |||
51 | return (err || msg.ret) ? -1 : 0; | ||
52 | } | ||
53 | |||
54 | /* load saved fresh copy of gloden image into channel gr_ctx */ | ||
55 | static int vgpu_gr_load_golden_ctx_image(struct gk20a *g, | ||
56 | struct channel_gk20a *c) | ||
57 | { | ||
58 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
59 | struct tegra_vgpu_cmd_msg msg; | ||
60 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | ||
61 | int err; | ||
62 | |||
63 | gk20a_dbg_fn(""); | ||
64 | |||
65 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_LOAD_GR_GOLDEN_CTX; | ||
66 | msg.handle = platform->virt_handle; | ||
67 | p->handle = c->virt_ctx; | ||
68 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
69 | |||
70 | return (err || msg.ret) ? -1 : 0; | ||
71 | } | ||
72 | |||
73 | static int vgpu_gr_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr) | ||
74 | { | ||
75 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
76 | |||
77 | gk20a_dbg_fn(""); | ||
78 | |||
79 | vgpu_get_attribute(platform->virt_handle, | ||
80 | TEGRA_VGPU_ATTRIB_GOLDEN_CTX_SIZE, | ||
81 | &g->gr.ctx_vars.golden_image_size); | ||
82 | vgpu_get_attribute(platform->virt_handle, | ||
83 | TEGRA_VGPU_ATTRIB_ZCULL_CTX_SIZE, | ||
84 | &g->gr.ctx_vars.zcull_ctxsw_image_size); | ||
85 | if (!g->gr.ctx_vars.golden_image_size || | ||
86 | !g->gr.ctx_vars.zcull_ctxsw_image_size) | ||
87 | return -ENXIO; | ||
88 | |||
89 | gr->ctx_vars.buffer_size = g->gr.ctx_vars.golden_image_size; | ||
90 | g->gr.ctx_vars.priv_access_map_size = 512 * 1024; | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | static int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g) | ||
95 | { | ||
96 | struct gr_gk20a *gr = &g->gr; | ||
97 | int attr_buffer_size; | ||
98 | |||
99 | u32 cb_buffer_size = gr->bundle_cb_default_size * | ||
100 | gr_scc_bundle_cb_size_div_256b_byte_granularity_v(); | ||
101 | |||
102 | u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() * | ||
103 | gr_scc_pagepool_total_pages_byte_granularity_v(); | ||
104 | |||
105 | gk20a_dbg_fn(""); | ||
106 | |||
107 | attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g); | ||
108 | |||
109 | gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size); | ||
110 | gr->global_ctx_buffer[CIRCULAR].size = cb_buffer_size; | ||
111 | |||
112 | gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size); | ||
113 | gr->global_ctx_buffer[PAGEPOOL].size = pagepool_buffer_size; | ||
114 | |||
115 | gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size); | ||
116 | gr->global_ctx_buffer[ATTRIBUTE].size = attr_buffer_size; | ||
117 | |||
118 | gk20a_dbg_info("priv access map size : %d", | ||
119 | gr->ctx_vars.priv_access_map_size); | ||
120 | gr->global_ctx_buffer[PRIV_ACCESS_MAP].size = | ||
121 | gr->ctx_vars.priv_access_map_size; | ||
122 | |||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | ||
127 | struct channel_gk20a *c) | ||
128 | { | ||
129 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
130 | struct tegra_vgpu_cmd_msg msg; | ||
131 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | ||
132 | struct vm_gk20a *ch_vm = c->vm; | ||
133 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | ||
134 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | ||
135 | struct gr_gk20a *gr = &g->gr; | ||
136 | u64 gpu_va; | ||
137 | u32 i; | ||
138 | int err; | ||
139 | |||
140 | gk20a_dbg_fn(""); | ||
141 | |||
142 | /* FIXME: add VPR support */ | ||
143 | |||
144 | /* Circular Buffer */ | ||
145 | gpu_va = gk20a_vm_alloc_va(ch_vm, | ||
146 | gr->global_ctx_buffer[CIRCULAR].size, 0); | ||
147 | |||
148 | if (!gpu_va) | ||
149 | goto clean_up; | ||
150 | g_bfr_va[CIRCULAR_VA] = gpu_va; | ||
151 | g_bfr_size[CIRCULAR_VA] = gr->global_ctx_buffer[CIRCULAR].size; | ||
152 | |||
153 | /* Attribute Buffer */ | ||
154 | gpu_va = gk20a_vm_alloc_va(ch_vm, | ||
155 | gr->global_ctx_buffer[ATTRIBUTE].size, 0); | ||
156 | |||
157 | if (!gpu_va) | ||
158 | goto clean_up; | ||
159 | g_bfr_va[ATTRIBUTE_VA] = gpu_va; | ||
160 | g_bfr_size[ATTRIBUTE_VA] = gr->global_ctx_buffer[ATTRIBUTE].size; | ||
161 | |||
162 | /* Page Pool */ | ||
163 | gpu_va = gk20a_vm_alloc_va(ch_vm, | ||
164 | gr->global_ctx_buffer[PAGEPOOL].size, 0); | ||
165 | if (!gpu_va) | ||
166 | goto clean_up; | ||
167 | g_bfr_va[PAGEPOOL_VA] = gpu_va; | ||
168 | g_bfr_size[PAGEPOOL_VA] = gr->global_ctx_buffer[PAGEPOOL].size; | ||
169 | |||
170 | /* Priv register Access Map */ | ||
171 | gpu_va = gk20a_vm_alloc_va(ch_vm, | ||
172 | gr->global_ctx_buffer[PRIV_ACCESS_MAP].size, 0); | ||
173 | if (!gpu_va) | ||
174 | goto clean_up; | ||
175 | g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; | ||
176 | g_bfr_size[PRIV_ACCESS_MAP_VA] = | ||
177 | gr->global_ctx_buffer[PRIV_ACCESS_MAP].size; | ||
178 | |||
179 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_MAP_GR_GLOBAL_CTX; | ||
180 | msg.handle = platform->virt_handle; | ||
181 | p->handle = c->virt_ctx; | ||
182 | p->cb_va = g_bfr_va[CIRCULAR_VA]; | ||
183 | p->attr_va = g_bfr_va[ATTRIBUTE_VA]; | ||
184 | p->page_pool_va = g_bfr_va[PAGEPOOL_VA]; | ||
185 | p->priv_access_map_va = g_bfr_va[PRIV_ACCESS_MAP_VA]; | ||
186 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
187 | if (err || msg.ret) | ||
188 | goto clean_up; | ||
189 | |||
190 | c->ch_ctx.global_ctx_buffer_mapped = true; | ||
191 | return 0; | ||
192 | |||
193 | clean_up: | ||
194 | for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { | ||
195 | if (g_bfr_va[i]) { | ||
196 | gk20a_vm_free_va(ch_vm, g_bfr_va[i], | ||
197 | g_bfr_size[i], 0); | ||
198 | g_bfr_va[i] = 0; | ||
199 | } | ||
200 | } | ||
201 | return -ENOMEM; | ||
202 | } | ||
203 | |||
204 | static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c) | ||
205 | { | ||
206 | struct gk20a_platform *platform = gk20a_get_platform(c->g->dev); | ||
207 | struct vm_gk20a *ch_vm = c->vm; | ||
208 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | ||
209 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | ||
210 | u32 i; | ||
211 | |||
212 | gk20a_dbg_fn(""); | ||
213 | |||
214 | if (c->ch_ctx.global_ctx_buffer_mapped) { | ||
215 | struct tegra_vgpu_cmd_msg msg; | ||
216 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | ||
217 | int err; | ||
218 | |||
219 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX; | ||
220 | msg.handle = platform->virt_handle; | ||
221 | p->handle = c->virt_ctx; | ||
222 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
223 | WARN_ON(err || msg.ret); | ||
224 | } | ||
225 | |||
226 | for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { | ||
227 | if (g_bfr_va[i]) { | ||
228 | gk20a_vm_free_va(ch_vm, g_bfr_va[i], g_bfr_size[i], 0); | ||
229 | g_bfr_va[i] = 0; | ||
230 | g_bfr_size[i] = 0; | ||
231 | } | ||
232 | } | ||
233 | c->ch_ctx.global_ctx_buffer_mapped = false; | ||
234 | } | ||
235 | |||
236 | static int vgpu_gr_alloc_channel_gr_ctx(struct gk20a *g, | ||
237 | struct channel_gk20a *c) | ||
238 | { | ||
239 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
240 | struct tegra_vgpu_cmd_msg msg; | ||
241 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | ||
242 | struct gr_gk20a *gr = &g->gr; | ||
243 | struct gr_ctx_desc *gr_ctx; | ||
244 | struct vm_gk20a *ch_vm = c->vm; | ||
245 | int err; | ||
246 | |||
247 | gk20a_dbg_fn(""); | ||
248 | |||
249 | if (gr->ctx_vars.buffer_size == 0) | ||
250 | return 0; | ||
251 | |||
252 | /* alloc channel gr ctx buffer */ | ||
253 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; | ||
254 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; | ||
255 | |||
256 | gr_ctx = kzalloc(sizeof(*gr_ctx), GFP_KERNEL); | ||
257 | if (!gr_ctx) | ||
258 | return -ENOMEM; | ||
259 | |||
260 | gr_ctx->size = gr->ctx_vars.buffer_total_size; | ||
261 | gr_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, gr_ctx->size, 0); | ||
262 | |||
263 | if (!gr_ctx->gpu_va) { | ||
264 | kfree(gr_ctx); | ||
265 | return -ENOMEM; | ||
266 | } | ||
267 | |||
268 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_CTX; | ||
269 | msg.handle = platform->virt_handle; | ||
270 | p->handle = c->virt_ctx; | ||
271 | p->gr_ctx_va = gr_ctx->gpu_va; | ||
272 | p->class_num = c->obj_class; | ||
273 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
274 | |||
275 | if (err || msg.ret) { | ||
276 | gk20a_vm_free_va(ch_vm, gr_ctx->gpu_va, gr_ctx->size, 0); | ||
277 | err = -ENOMEM; | ||
278 | } else | ||
279 | c->ch_ctx.gr_ctx = gr_ctx; | ||
280 | |||
281 | return err; | ||
282 | } | ||
283 | |||
284 | static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c) | ||
285 | { | ||
286 | struct gk20a_platform *platform = gk20a_get_platform(c->g->dev); | ||
287 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
288 | struct vm_gk20a *ch_vm = c->vm; | ||
289 | |||
290 | gk20a_dbg_fn(""); | ||
291 | |||
292 | if (ch_ctx->gr_ctx && ch_ctx->gr_ctx->gpu_va) { | ||
293 | struct tegra_vgpu_cmd_msg msg; | ||
294 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | ||
295 | int err; | ||
296 | |||
297 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_CTX; | ||
298 | msg.handle = platform->virt_handle; | ||
299 | p->handle = c->virt_ctx; | ||
300 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
301 | WARN_ON(err || msg.ret); | ||
302 | |||
303 | gk20a_vm_free_va(ch_vm, ch_ctx->gr_ctx->gpu_va, | ||
304 | ch_ctx->gr_ctx->size, 0); | ||
305 | ch_ctx->gr_ctx->gpu_va = 0; | ||
306 | kfree(ch_ctx->gr_ctx); | ||
307 | } | ||
308 | } | ||
309 | |||
310 | static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, | ||
311 | struct channel_gk20a *c) | ||
312 | { | ||
313 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
314 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | ||
315 | struct vm_gk20a *ch_vm = c->vm; | ||
316 | struct tegra_vgpu_cmd_msg msg; | ||
317 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | ||
318 | int err; | ||
319 | |||
320 | gk20a_dbg_fn(""); | ||
321 | |||
322 | patch_ctx->size = 128 * sizeof(u32); | ||
323 | patch_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, patch_ctx->size, 0); | ||
324 | if (!patch_ctx->gpu_va) | ||
325 | return -ENOMEM; | ||
326 | |||
327 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_PATCH_CTX; | ||
328 | msg.handle = platform->virt_handle; | ||
329 | p->handle = c->virt_ctx; | ||
330 | p->patch_ctx_va = patch_ctx->gpu_va; | ||
331 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
332 | if (err || msg.ret) { | ||
333 | gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0); | ||
334 | err = -ENOMEM; | ||
335 | } | ||
336 | |||
337 | return err; | ||
338 | } | ||
339 | |||
340 | static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c) | ||
341 | { | ||
342 | struct gk20a_platform *platform = gk20a_get_platform(c->g->dev); | ||
343 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | ||
344 | struct vm_gk20a *ch_vm = c->vm; | ||
345 | |||
346 | gk20a_dbg_fn(""); | ||
347 | |||
348 | if (patch_ctx->gpu_va) { | ||
349 | struct tegra_vgpu_cmd_msg msg; | ||
350 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | ||
351 | int err; | ||
352 | |||
353 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX; | ||
354 | msg.handle = platform->virt_handle; | ||
355 | p->handle = c->virt_ctx; | ||
356 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
357 | WARN_ON(err || msg.ret); | ||
358 | |||
359 | gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0); | ||
360 | patch_ctx->gpu_va = 0; | ||
361 | } | ||
362 | } | ||
363 | |||
364 | static void vgpu_gr_free_channel_ctx(struct channel_gk20a *c) | ||
365 | { | ||
366 | gk20a_dbg_fn(""); | ||
367 | |||
368 | vgpu_gr_unmap_global_ctx_buffers(c); | ||
369 | vgpu_gr_free_channel_patch_ctx(c); | ||
370 | if (!gk20a_is_channel_marked_as_tsg(c)) | ||
371 | vgpu_gr_free_channel_gr_ctx(c); | ||
372 | |||
373 | /* zcull_ctx, pm_ctx */ | ||
374 | |||
375 | memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); | ||
376 | |||
377 | c->num_objects = 0; | ||
378 | c->first_init = false; | ||
379 | } | ||
380 | |||
381 | static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, | ||
382 | struct nvhost_alloc_obj_ctx_args *args) | ||
383 | { | ||
384 | struct gk20a *g = c->g; | ||
385 | struct fifo_gk20a *f = &g->fifo; | ||
386 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
387 | struct tsg_gk20a *tsg = NULL; | ||
388 | int err = 0; | ||
389 | |||
390 | gk20a_dbg_fn(""); | ||
391 | |||
392 | /* an address space needs to have been bound at this point.*/ | ||
393 | if (!gk20a_channel_as_bound(c)) { | ||
394 | gk20a_err(dev_from_gk20a(g), | ||
395 | "not bound to address space at time" | ||
396 | " of grctx allocation"); | ||
397 | return -EINVAL; | ||
398 | } | ||
399 | |||
400 | if (!g->ops.gr.is_valid_class(g, args->class_num)) { | ||
401 | gk20a_err(dev_from_gk20a(g), | ||
402 | "invalid obj class 0x%x", args->class_num); | ||
403 | err = -EINVAL; | ||
404 | goto out; | ||
405 | } | ||
406 | c->obj_class = args->class_num; | ||
407 | |||
408 | /* FIXME: add TSG support */ | ||
409 | if (gk20a_is_channel_marked_as_tsg(c)) | ||
410 | tsg = &f->tsg[c->tsgid]; | ||
411 | |||
412 | /* allocate gr ctx buffer */ | ||
413 | if (!ch_ctx->gr_ctx) { | ||
414 | err = vgpu_gr_alloc_channel_gr_ctx(g, c); | ||
415 | if (err) { | ||
416 | gk20a_err(dev_from_gk20a(g), | ||
417 | "fail to allocate gr ctx buffer"); | ||
418 | goto out; | ||
419 | } | ||
420 | } else { | ||
421 | /*TBD: needs to be more subtle about which is | ||
422 | * being allocated as some are allowed to be | ||
423 | * allocated along same channel */ | ||
424 | gk20a_err(dev_from_gk20a(g), | ||
425 | "too many classes alloc'd on same channel"); | ||
426 | err = -EINVAL; | ||
427 | goto out; | ||
428 | } | ||
429 | |||
430 | /* commit gr ctx buffer */ | ||
431 | err = vgpu_gr_commit_inst(c, ch_ctx->gr_ctx->gpu_va); | ||
432 | if (err) { | ||
433 | gk20a_err(dev_from_gk20a(g), | ||
434 | "fail to commit gr ctx buffer"); | ||
435 | goto out; | ||
436 | } | ||
437 | |||
438 | /* allocate patch buffer */ | ||
439 | if (ch_ctx->patch_ctx.pages == NULL) { | ||
440 | err = vgpu_gr_alloc_channel_patch_ctx(g, c); | ||
441 | if (err) { | ||
442 | gk20a_err(dev_from_gk20a(g), | ||
443 | "fail to allocate patch buffer"); | ||
444 | goto out; | ||
445 | } | ||
446 | } | ||
447 | |||
448 | /* map global buffer to channel gpu_va and commit */ | ||
449 | if (!ch_ctx->global_ctx_buffer_mapped) { | ||
450 | err = vgpu_gr_map_global_ctx_buffers(g, c); | ||
451 | if (err) { | ||
452 | gk20a_err(dev_from_gk20a(g), | ||
453 | "fail to map global ctx buffer"); | ||
454 | goto out; | ||
455 | } | ||
456 | gr_gk20a_elpg_protected_call(g, | ||
457 | vgpu_gr_commit_global_ctx_buffers(g, c, true)); | ||
458 | } | ||
459 | |||
460 | /* load golden image */ | ||
461 | if (!c->first_init) { | ||
462 | err = gr_gk20a_elpg_protected_call(g, | ||
463 | vgpu_gr_load_golden_ctx_image(g, c)); | ||
464 | if (err) { | ||
465 | gk20a_err(dev_from_gk20a(g), | ||
466 | "fail to load golden ctx image"); | ||
467 | goto out; | ||
468 | } | ||
469 | c->first_init = true; | ||
470 | } | ||
471 | |||
472 | c->num_objects++; | ||
473 | |||
474 | gk20a_dbg_fn("done"); | ||
475 | return 0; | ||
476 | out: | ||
477 | /* 1. gr_ctx, patch_ctx and global ctx buffer mapping | ||
478 | can be reused so no need to release them. | ||
479 | 2. golden image load is a one time thing so if | ||
480 | they pass, no need to undo. */ | ||
481 | gk20a_err(dev_from_gk20a(g), "fail"); | ||
482 | return err; | ||
483 | } | ||
484 | |||
485 | static int vgpu_gr_free_obj_ctx(struct channel_gk20a *c, | ||
486 | struct nvhost_free_obj_ctx_args *args) | ||
487 | { | ||
488 | unsigned long timeout = gk20a_get_gr_idle_timeout(c->g); | ||
489 | |||
490 | gk20a_dbg_fn(""); | ||
491 | |||
492 | if (c->num_objects == 0) | ||
493 | return 0; | ||
494 | |||
495 | c->num_objects--; | ||
496 | |||
497 | if (c->num_objects == 0) { | ||
498 | c->first_init = false; | ||
499 | gk20a_disable_channel(c, | ||
500 | !c->has_timedout, | ||
501 | timeout); | ||
502 | } | ||
503 | |||
504 | return 0; | ||
505 | } | ||
506 | |||
507 | static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | ||
508 | { | ||
509 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
510 | |||
511 | gk20a_dbg_fn(""); | ||
512 | |||
513 | if (vgpu_get_attribute(platform->virt_handle, | ||
514 | TEGRA_VGPU_ATTRIB_GPC_COUNT, &gr->gpc_count)) | ||
515 | return -ENOMEM; | ||
516 | |||
517 | if (vgpu_get_attribute(platform->virt_handle, | ||
518 | TEGRA_VGPU_ATTRIB_MAX_TPC_PER_GPC_COUNT, | ||
519 | &gr->max_tpc_per_gpc_count)) | ||
520 | return -ENOMEM; | ||
521 | |||
522 | if (vgpu_get_attribute(platform->virt_handle, | ||
523 | TEGRA_VGPU_ATTRIB_MAX_TPC_COUNT, | ||
524 | &gr->max_tpc_count)) | ||
525 | return -ENOMEM; | ||
526 | |||
527 | g->ops.gr.bundle_cb_defaults(g); | ||
528 | g->ops.gr.cb_size_default(g); | ||
529 | g->ops.gr.calc_global_ctx_buffer_size(g); | ||
530 | return 0; | ||
531 | } | ||
532 | |||
533 | static int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, | ||
534 | struct channel_gk20a *c, u64 zcull_va, | ||
535 | u32 mode) | ||
536 | { | ||
537 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
538 | struct tegra_vgpu_cmd_msg msg; | ||
539 | struct tegra_vgpu_zcull_bind_params *p = &msg.params.zcull_bind; | ||
540 | int err; | ||
541 | |||
542 | gk20a_dbg_fn(""); | ||
543 | |||
544 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_ZCULL; | ||
545 | msg.handle = platform->virt_handle; | ||
546 | p->handle = c->virt_ctx; | ||
547 | p->zcull_va = zcull_va; | ||
548 | p->mode = mode; | ||
549 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
550 | |||
551 | return (err || msg.ret) ? -ENOMEM : 0; | ||
552 | } | ||
553 | |||
554 | static int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, | ||
555 | struct gr_zcull_info *zcull_params) | ||
556 | { | ||
557 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
558 | struct tegra_vgpu_cmd_msg msg; | ||
559 | struct tegra_vgpu_zcull_info_params *p = &msg.params.zcull_info; | ||
560 | int err; | ||
561 | |||
562 | gk20a_dbg_fn(""); | ||
563 | |||
564 | msg.cmd = TEGRA_VGPU_CMD_GET_ZCULL_INFO; | ||
565 | msg.handle = platform->virt_handle; | ||
566 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
567 | if (err || msg.ret) | ||
568 | return -ENOMEM; | ||
569 | |||
570 | zcull_params->width_align_pixels = p->width_align_pixels; | ||
571 | zcull_params->height_align_pixels = p->height_align_pixels; | ||
572 | zcull_params->pixel_squares_by_aliquots = p->pixel_squares_by_aliquots; | ||
573 | zcull_params->aliquot_total = p->aliquot_total; | ||
574 | zcull_params->region_byte_multiplier = p->region_byte_multiplier; | ||
575 | zcull_params->region_header_size = p->region_header_size; | ||
576 | zcull_params->subregion_header_size = p->subregion_header_size; | ||
577 | zcull_params->subregion_width_align_pixels = | ||
578 | p->subregion_width_align_pixels; | ||
579 | zcull_params->subregion_height_align_pixels = | ||
580 | p->subregion_height_align_pixels; | ||
581 | zcull_params->subregion_count = p->subregion_count; | ||
582 | |||
583 | return 0; | ||
584 | } | ||
585 | |||
586 | static void vgpu_remove_gr_support(struct gr_gk20a *gr) | ||
587 | { | ||
588 | gk20a_dbg_fn(""); | ||
589 | |||
590 | gk20a_allocator_destroy(&gr->comp_tags); | ||
591 | } | ||
592 | |||
593 | static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) | ||
594 | { | ||
595 | struct gr_gk20a *gr = &g->gr; | ||
596 | int err; | ||
597 | |||
598 | gk20a_dbg_fn(""); | ||
599 | |||
600 | if (gr->sw_ready) { | ||
601 | gk20a_dbg_fn("skip init"); | ||
602 | return 0; | ||
603 | } | ||
604 | |||
605 | gr->g = g; | ||
606 | |||
607 | err = vgpu_gr_init_gr_config(g, gr); | ||
608 | if (err) | ||
609 | goto clean_up; | ||
610 | |||
611 | err = vgpu_gr_init_ctx_state(g, gr); | ||
612 | if (err) | ||
613 | goto clean_up; | ||
614 | |||
615 | err = g->ops.ltc.init_comptags(g, gr); | ||
616 | if (err) | ||
617 | goto clean_up; | ||
618 | |||
619 | err = vgpu_gr_alloc_global_ctx_buffers(g); | ||
620 | if (err) | ||
621 | goto clean_up; | ||
622 | |||
623 | mutex_init(&gr->ctx_mutex); | ||
624 | |||
625 | gr->remove_support = vgpu_remove_gr_support; | ||
626 | gr->sw_ready = true; | ||
627 | |||
628 | gk20a_dbg_fn("done"); | ||
629 | return 0; | ||
630 | |||
631 | clean_up: | ||
632 | gk20a_err(dev_from_gk20a(g), "fail"); | ||
633 | vgpu_remove_gr_support(gr); | ||
634 | return err; | ||
635 | } | ||
636 | |||
637 | int vgpu_init_gr_support(struct gk20a *g) | ||
638 | { | ||
639 | gk20a_dbg_fn(""); | ||
640 | |||
641 | return vgpu_gr_init_gr_setup_sw(g); | ||
642 | } | ||
643 | |||
644 | struct gr_isr_data { | ||
645 | u32 addr; | ||
646 | u32 data_lo; | ||
647 | u32 data_hi; | ||
648 | u32 curr_ctx; | ||
649 | u32 chid; | ||
650 | u32 offset; | ||
651 | u32 sub_chan; | ||
652 | u32 class_num; | ||
653 | }; | ||
654 | |||
655 | static int vgpu_gr_handle_notify_pending(struct gk20a *g, | ||
656 | struct gr_isr_data *isr_data) | ||
657 | { | ||
658 | struct fifo_gk20a *f = &g->fifo; | ||
659 | struct channel_gk20a *ch = &f->channel[isr_data->chid]; | ||
660 | |||
661 | gk20a_dbg_fn(""); | ||
662 | wake_up(&ch->notifier_wq); | ||
663 | return 0; | ||
664 | } | ||
665 | |||
666 | int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info) | ||
667 | { | ||
668 | struct gr_isr_data isr_data; | ||
669 | |||
670 | gk20a_dbg_fn(""); | ||
671 | |||
672 | isr_data.chid = info->chid; | ||
673 | |||
674 | if (info->type == TEGRA_VGPU_GR_INTR_NOTIFY) | ||
675 | vgpu_gr_handle_notify_pending(g, &isr_data); | ||
676 | |||
677 | return 0; | ||
678 | } | ||
679 | |||
680 | void vgpu_init_gr_ops(struct gpu_ops *gops) | ||
681 | { | ||
682 | gops->gr.free_channel_ctx = vgpu_gr_free_channel_ctx; | ||
683 | gops->gr.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx; | ||
684 | gops->gr.free_obj_ctx = vgpu_gr_free_obj_ctx; | ||
685 | gops->gr.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull; | ||
686 | gops->gr.get_zcull_info = vgpu_gr_get_zcull_info; | ||
687 | } | ||
diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c new file mode 100644 index 00000000..ddff23b7 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c | |||
@@ -0,0 +1,55 @@ | |||
1 | /* | ||
2 | * Virtualized GPU L2 | ||
3 | * | ||
4 | * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include "vgpu/vgpu.h" | ||
17 | |||
18 | static int vgpu_determine_L2_size_bytes(struct gk20a *g) | ||
19 | { | ||
20 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
21 | u32 cache_size = 0; | ||
22 | |||
23 | gk20a_dbg_fn(""); | ||
24 | |||
25 | if (vgpu_get_attribute(platform->virt_handle, | ||
26 | TEGRA_VGPU_ATTRIB_L2_SIZE, &cache_size)) | ||
27 | dev_err(dev_from_gk20a(g), "unable to get L2 size"); | ||
28 | |||
29 | return cache_size; | ||
30 | } | ||
31 | |||
32 | static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | ||
33 | { | ||
34 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
35 | u32 max_comptag_lines = 0; | ||
36 | |||
37 | gk20a_dbg_fn(""); | ||
38 | |||
39 | vgpu_get_attribute(platform->virt_handle, | ||
40 | TEGRA_VGPU_ATTRIB_COMPTAG_LINES, &max_comptag_lines); | ||
41 | if (max_comptag_lines < 2) | ||
42 | return -ENXIO; | ||
43 | |||
44 | gk20a_allocator_init(&gr->comp_tags, "comptag", | ||
45 | 1, /* start */ | ||
46 | max_comptag_lines - 1, /* length*/ | ||
47 | 1); /* align */ | ||
48 | return 0; | ||
49 | } | ||
50 | |||
51 | void vgpu_init_ltc_ops(struct gpu_ops *gops) | ||
52 | { | ||
53 | gops->ltc.determine_L2_size_bytes = vgpu_determine_L2_size_bytes; | ||
54 | gops->ltc.init_comptags = vgpu_ltc_init_comptags; | ||
55 | } | ||
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c new file mode 100644 index 00000000..6ed1dece --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -0,0 +1,425 @@ | |||
1 | /* | ||
2 | * Virtualized GPU Memory Management | ||
3 | * | ||
4 | * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/dma-mapping.h> | ||
17 | #include "vgpu/vgpu.h" | ||
18 | |||
19 | /* note: keep the page sizes sorted lowest to highest here */ | ||
20 | static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; | ||
21 | static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 }; | ||
22 | |||
23 | static int vgpu_init_mm_setup_sw(struct gk20a *g) | ||
24 | { | ||
25 | struct mm_gk20a *mm = &g->mm; | ||
26 | |||
27 | gk20a_dbg_fn(""); | ||
28 | |||
29 | if (mm->sw_ready) { | ||
30 | gk20a_dbg_fn("skip init"); | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | mm->g = g; | ||
35 | mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; | ||
36 | mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big]; | ||
37 | mm->pde_stride = mm->big_page_size << 10; | ||
38 | mm->pde_stride_shift = ilog2(mm->pde_stride); | ||
39 | BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */ | ||
40 | |||
41 | /*TBD: make channel vm size configurable */ | ||
42 | mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; | ||
43 | |||
44 | gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); | ||
45 | |||
46 | mm->sw_ready = true; | ||
47 | |||
48 | return 0; | ||
49 | } | ||
50 | |||
51 | int vgpu_init_mm_support(struct gk20a *g) | ||
52 | { | ||
53 | gk20a_dbg_fn(""); | ||
54 | |||
55 | return vgpu_init_mm_setup_sw(g); | ||
56 | } | ||
57 | |||
58 | static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | ||
59 | u64 map_offset, | ||
60 | struct sg_table *sgt, | ||
61 | u64 buffer_offset, | ||
62 | u64 size, | ||
63 | int pgsz_idx, | ||
64 | u8 kind_v, | ||
65 | u32 ctag_offset, | ||
66 | u32 flags, | ||
67 | int rw_flag, | ||
68 | bool clear_ctags) | ||
69 | { | ||
70 | int err = 0; | ||
71 | struct device *d = dev_from_vm(vm); | ||
72 | struct gk20a *g = gk20a_from_vm(vm); | ||
73 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
74 | struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); | ||
75 | struct tegra_vgpu_cmd_msg msg; | ||
76 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; | ||
77 | u64 addr = gk20a_mm_iova_addr(sgt->sgl); | ||
78 | u8 prot; | ||
79 | |||
80 | gk20a_dbg_fn(""); | ||
81 | |||
82 | /* Allocate (or validate when map_offset != 0) the virtual address. */ | ||
83 | if (!map_offset) { | ||
84 | map_offset = gk20a_vm_alloc_va(vm, size, | ||
85 | pgsz_idx); | ||
86 | if (!map_offset) { | ||
87 | gk20a_err(d, "failed to allocate va space"); | ||
88 | err = -ENOMEM; | ||
89 | goto fail; | ||
90 | } | ||
91 | } | ||
92 | |||
93 | if (rw_flag == gk20a_mem_flag_read_only) | ||
94 | prot = TEGRA_VGPU_MAP_PROT_READ_ONLY; | ||
95 | else if (rw_flag == gk20a_mem_flag_write_only) | ||
96 | prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY; | ||
97 | else | ||
98 | prot = TEGRA_VGPU_MAP_PROT_NONE; | ||
99 | |||
100 | msg.cmd = TEGRA_VGPU_CMD_AS_MAP; | ||
101 | msg.handle = platform->virt_handle; | ||
102 | p->handle = vm->handle; | ||
103 | p->addr = addr; | ||
104 | p->gpu_va = map_offset; | ||
105 | p->size = size; | ||
106 | p->pgsz_idx = pgsz_idx; | ||
107 | p->iova = mapping ? 1 : 0; | ||
108 | p->kind = kind_v; | ||
109 | p->cacheable = | ||
110 | (flags & NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE) ? 1 : 0; | ||
111 | p->prot = prot; | ||
112 | p->ctag_offset = ctag_offset; | ||
113 | p->clear_ctags = clear_ctags; | ||
114 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
115 | if (err || msg.ret) | ||
116 | goto fail; | ||
117 | |||
118 | vm->tlb_dirty = true; | ||
119 | return map_offset; | ||
120 | fail: | ||
121 | gk20a_err(d, "%s: failed with err=%d\n", __func__, err); | ||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm, | ||
126 | u64 vaddr, | ||
127 | u64 size, | ||
128 | int pgsz_idx, | ||
129 | bool va_allocated, | ||
130 | int rw_flag) | ||
131 | { | ||
132 | struct gk20a *g = gk20a_from_vm(vm); | ||
133 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
134 | struct tegra_vgpu_cmd_msg msg; | ||
135 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; | ||
136 | int err; | ||
137 | |||
138 | gk20a_dbg_fn(""); | ||
139 | |||
140 | if (va_allocated) { | ||
141 | err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx); | ||
142 | if (err) { | ||
143 | dev_err(dev_from_vm(vm), | ||
144 | "failed to free va"); | ||
145 | return; | ||
146 | } | ||
147 | } | ||
148 | |||
149 | msg.cmd = TEGRA_VGPU_CMD_AS_UNMAP; | ||
150 | msg.handle = platform->virt_handle; | ||
151 | p->handle = vm->handle; | ||
152 | p->gpu_va = vaddr; | ||
153 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
154 | if (err || msg.ret) | ||
155 | dev_err(dev_from_vm(vm), | ||
156 | "failed to update gmmu ptes on unmap"); | ||
157 | |||
158 | vm->tlb_dirty = true; | ||
159 | } | ||
160 | |||
161 | static void vgpu_vm_remove_support(struct vm_gk20a *vm) | ||
162 | { | ||
163 | struct gk20a *g = vm->mm->g; | ||
164 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
165 | struct mapped_buffer_node *mapped_buffer; | ||
166 | struct vm_reserved_va_node *va_node, *va_node_tmp; | ||
167 | struct tegra_vgpu_cmd_msg msg; | ||
168 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; | ||
169 | struct rb_node *node; | ||
170 | int err; | ||
171 | |||
172 | gk20a_dbg_fn(""); | ||
173 | mutex_lock(&vm->update_gmmu_lock); | ||
174 | |||
175 | /* TBD: add a flag here for the unmap code to recognize teardown | ||
176 | * and short-circuit any otherwise expensive operations. */ | ||
177 | |||
178 | node = rb_first(&vm->mapped_buffers); | ||
179 | while (node) { | ||
180 | mapped_buffer = | ||
181 | container_of(node, struct mapped_buffer_node, node); | ||
182 | gk20a_vm_unmap_locked(mapped_buffer); | ||
183 | node = rb_first(&vm->mapped_buffers); | ||
184 | } | ||
185 | |||
186 | /* destroy remaining reserved memory areas */ | ||
187 | list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list, | ||
188 | reserved_va_list) { | ||
189 | list_del(&va_node->reserved_va_list); | ||
190 | kfree(va_node); | ||
191 | } | ||
192 | |||
193 | msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; | ||
194 | msg.handle = platform->virt_handle; | ||
195 | p->handle = vm->handle; | ||
196 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
197 | WARN_ON(err || msg.ret); | ||
198 | |||
199 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | ||
200 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | ||
201 | |||
202 | mutex_unlock(&vm->update_gmmu_lock); | ||
203 | |||
204 | /* release zero page if used */ | ||
205 | if (vm->zero_page_cpuva) | ||
206 | dma_free_coherent(&g->dev->dev, vm->mm->big_page_size, | ||
207 | vm->zero_page_cpuva, vm->zero_page_iova); | ||
208 | |||
209 | /* vm is not used anymore. release it. */ | ||
210 | kfree(vm); | ||
211 | } | ||
212 | |||
213 | u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size) | ||
214 | { | ||
215 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
216 | struct dma_iommu_mapping *mapping = | ||
217 | to_dma_iommu_mapping(dev_from_gk20a(g)); | ||
218 | u64 addr = gk20a_mm_iova_addr((*sgt)->sgl); | ||
219 | struct tegra_vgpu_cmd_msg msg; | ||
220 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; | ||
221 | int err; | ||
222 | |||
223 | msg.cmd = TEGRA_VGPU_CMD_MAP_BAR1; | ||
224 | msg.handle = platform->virt_handle; | ||
225 | p->addr = addr; | ||
226 | p->size = size; | ||
227 | p->iova = mapping ? 1 : 0; | ||
228 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
229 | if (err || msg.ret) | ||
230 | addr = 0; | ||
231 | else | ||
232 | addr = p->gpu_va; | ||
233 | |||
234 | return addr; | ||
235 | } | ||
236 | |||
237 | /* address space interfaces for the gk20a module */ | ||
238 | static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share) | ||
239 | { | ||
240 | struct gk20a_as *as = as_share->as; | ||
241 | struct gk20a *g = gk20a_from_as(as); | ||
242 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
243 | struct tegra_vgpu_cmd_msg msg; | ||
244 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; | ||
245 | struct mm_gk20a *mm = &g->mm; | ||
246 | struct vm_gk20a *vm; | ||
247 | u64 vma_size; | ||
248 | u32 num_pages, low_hole_pages; | ||
249 | char name[32]; | ||
250 | int err; | ||
251 | |||
252 | gk20a_dbg_fn(""); | ||
253 | |||
254 | vm = kzalloc(sizeof(*vm), GFP_KERNEL); | ||
255 | if (!vm) | ||
256 | return -ENOMEM; | ||
257 | |||
258 | as_share->vm = vm; | ||
259 | |||
260 | vm->mm = mm; | ||
261 | vm->as_share = as_share; | ||
262 | |||
263 | vm->big_pages = true; | ||
264 | |||
265 | vm->va_start = mm->pde_stride; /* create a one pde hole */ | ||
266 | vm->va_limit = mm->channel.size; /* note this means channel.size is | ||
267 | really just the max */ | ||
268 | |||
269 | msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE; | ||
270 | msg.handle = platform->virt_handle; | ||
271 | p->size = vm->va_limit; | ||
272 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
273 | if (err || msg.ret) | ||
274 | return -ENOMEM; | ||
275 | |||
276 | vm->handle = p->handle; | ||
277 | |||
278 | /* low-half: alloc small pages */ | ||
279 | /* high-half: alloc big pages */ | ||
280 | vma_size = mm->channel.size >> 1; | ||
281 | |||
282 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | ||
283 | gmmu_page_sizes[gmmu_page_size_small]>>10); | ||
284 | num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]); | ||
285 | |||
286 | /* num_pages above is without regard to the low-side hole. */ | ||
287 | low_hole_pages = (vm->va_start >> | ||
288 | gmmu_page_shifts[gmmu_page_size_small]); | ||
289 | |||
290 | gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name, | ||
291 | low_hole_pages, /* start */ | ||
292 | num_pages - low_hole_pages, /* length */ | ||
293 | 1); /* align */ | ||
294 | |||
295 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | ||
296 | gmmu_page_sizes[gmmu_page_size_big]>>10); | ||
297 | |||
298 | num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]); | ||
299 | gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name, | ||
300 | num_pages, /* start */ | ||
301 | num_pages, /* length */ | ||
302 | 1); /* align */ | ||
303 | |||
304 | vm->mapped_buffers = RB_ROOT; | ||
305 | |||
306 | mutex_init(&vm->update_gmmu_lock); | ||
307 | kref_init(&vm->ref); | ||
308 | INIT_LIST_HEAD(&vm->reserved_va_list); | ||
309 | |||
310 | vm->enable_ctag = true; | ||
311 | |||
312 | return 0; | ||
313 | } | ||
314 | |||
315 | static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share, | ||
316 | struct channel_gk20a *ch) | ||
317 | { | ||
318 | struct vm_gk20a *vm = as_share->vm; | ||
319 | struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); | ||
320 | struct tegra_vgpu_cmd_msg msg; | ||
321 | struct tegra_vgpu_as_bind_share_params *p = &msg.params.as_bind_share; | ||
322 | int err; | ||
323 | |||
324 | gk20a_dbg_fn(""); | ||
325 | |||
326 | ch->vm = vm; | ||
327 | msg.cmd = TEGRA_VGPU_CMD_AS_BIND_SHARE; | ||
328 | msg.handle = platform->virt_handle; | ||
329 | p->as_handle = vm->handle; | ||
330 | p->chan_handle = ch->virt_ctx; | ||
331 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
332 | |||
333 | if (err || msg.ret) { | ||
334 | ch->vm = NULL; | ||
335 | err = -ENOMEM; | ||
336 | } | ||
337 | |||
338 | return err; | ||
339 | } | ||
340 | |||
341 | static void vgpu_cache_maint(u64 handle, u8 op) | ||
342 | { | ||
343 | struct tegra_vgpu_cmd_msg msg; | ||
344 | struct tegra_vgpu_cache_maint_params *p = &msg.params.cache_maint; | ||
345 | int err; | ||
346 | |||
347 | msg.cmd = TEGRA_VGPU_CMD_CACHE_MAINT; | ||
348 | msg.handle = handle; | ||
349 | p->op = op; | ||
350 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
351 | WARN_ON(err || msg.ret); | ||
352 | } | ||
353 | |||
354 | static int vgpu_mm_fb_flush(struct gk20a *g) | ||
355 | { | ||
356 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
357 | |||
358 | gk20a_dbg_fn(""); | ||
359 | |||
360 | vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_FB_FLUSH); | ||
361 | return 0; | ||
362 | } | ||
363 | |||
364 | static void vgpu_mm_l2_invalidate(struct gk20a *g) | ||
365 | { | ||
366 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
367 | |||
368 | gk20a_dbg_fn(""); | ||
369 | |||
370 | vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_L2_MAINT_INV); | ||
371 | } | ||
372 | |||
373 | static void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate) | ||
374 | { | ||
375 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
376 | u8 op; | ||
377 | |||
378 | gk20a_dbg_fn(""); | ||
379 | |||
380 | if (invalidate) | ||
381 | op = TEGRA_VGPU_L2_MAINT_FLUSH_INV; | ||
382 | else | ||
383 | op = TEGRA_VGPU_L2_MAINT_FLUSH; | ||
384 | |||
385 | vgpu_cache_maint(platform->virt_handle, op); | ||
386 | } | ||
387 | |||
388 | static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm) | ||
389 | { | ||
390 | struct gk20a *g = gk20a_from_vm(vm); | ||
391 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
392 | struct tegra_vgpu_cmd_msg msg; | ||
393 | struct tegra_vgpu_as_invalidate_params *p = &msg.params.as_invalidate; | ||
394 | int err; | ||
395 | |||
396 | gk20a_dbg_fn(""); | ||
397 | |||
398 | /* No need to invalidate if tlb is clean */ | ||
399 | mutex_lock(&vm->update_gmmu_lock); | ||
400 | if (!vm->tlb_dirty) { | ||
401 | mutex_unlock(&vm->update_gmmu_lock); | ||
402 | return; | ||
403 | } | ||
404 | |||
405 | msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE; | ||
406 | msg.handle = platform->virt_handle; | ||
407 | p->handle = vm->handle; | ||
408 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
409 | WARN_ON(err || msg.ret); | ||
410 | vm->tlb_dirty = false; | ||
411 | mutex_unlock(&vm->update_gmmu_lock); | ||
412 | } | ||
413 | |||
414 | void vgpu_init_mm_ops(struct gpu_ops *gops) | ||
415 | { | ||
416 | gops->mm.gmmu_map = vgpu_locked_gmmu_map; | ||
417 | gops->mm.gmmu_unmap = vgpu_locked_gmmu_unmap; | ||
418 | gops->mm.vm_remove = vgpu_vm_remove_support; | ||
419 | gops->mm.vm_alloc_share = vgpu_vm_alloc_share; | ||
420 | gops->mm.vm_bind_channel = vgpu_vm_bind_channel; | ||
421 | gops->mm.fb_flush = vgpu_mm_fb_flush; | ||
422 | gops->mm.l2_invalidate = vgpu_mm_l2_invalidate; | ||
423 | gops->mm.l2_flush = vgpu_mm_l2_flush; | ||
424 | gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate; | ||
425 | } | ||
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c new file mode 100644 index 00000000..cfe307ff --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c | |||
@@ -0,0 +1,416 @@ | |||
1 | /* | ||
2 | * Virtualized GPU | ||
3 | * | ||
4 | * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/kthread.h> | ||
17 | #include <linux/delay.h> | ||
18 | #include <linux/dma-mapping.h> | ||
19 | #include <linux/pm_runtime.h> | ||
20 | #include "vgpu/vgpu.h" | ||
21 | #include "gk20a/debug_gk20a.h" | ||
22 | #include "gk20a/hal_gk20a.h" | ||
23 | #include "gk20a/hw_mc_gk20a.h" | ||
24 | |||
25 | static inline int vgpu_comm_init(struct platform_device *pdev) | ||
26 | { | ||
27 | size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES }; | ||
28 | |||
29 | return tegra_gr_comm_init(pdev, TEGRA_GR_COMM_CTX_CLIENT, 3, | ||
30 | queue_sizes, TEGRA_VGPU_QUEUE_CMD, | ||
31 | ARRAY_SIZE(queue_sizes)); | ||
32 | } | ||
33 | |||
34 | static inline void vgpu_comm_deinit(void) | ||
35 | { | ||
36 | size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES }; | ||
37 | |||
38 | tegra_gr_comm_deinit(TEGRA_GR_COMM_CTX_CLIENT, TEGRA_VGPU_QUEUE_CMD, | ||
39 | ARRAY_SIZE(queue_sizes)); | ||
40 | } | ||
41 | |||
42 | int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in, | ||
43 | size_t size_out) | ||
44 | { | ||
45 | void *handle; | ||
46 | size_t size = size_in; | ||
47 | void *data = msg; | ||
48 | int err; | ||
49 | |||
50 | err = tegra_gr_comm_sendrecv(TEGRA_GR_COMM_CTX_CLIENT, | ||
51 | tegra_gr_comm_get_server_vmid(), | ||
52 | TEGRA_VGPU_QUEUE_CMD, &handle, &data, &size); | ||
53 | if (!err) { | ||
54 | WARN_ON(size < size_out); | ||
55 | memcpy(msg, data, size_out); | ||
56 | tegra_gr_comm_release(handle); | ||
57 | } | ||
58 | |||
59 | return err; | ||
60 | } | ||
61 | |||
62 | static u64 vgpu_connect(void) | ||
63 | { | ||
64 | struct tegra_vgpu_cmd_msg msg; | ||
65 | struct tegra_vgpu_connect_params *p = &msg.params.connect; | ||
66 | int err; | ||
67 | |||
68 | msg.cmd = TEGRA_VGPU_CMD_CONNECT; | ||
69 | p->module = TEGRA_VGPU_MODULE_GPU; | ||
70 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
71 | |||
72 | return (err || msg.ret) ? 0 : p->handle; | ||
73 | } | ||
74 | |||
75 | int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value) | ||
76 | { | ||
77 | struct tegra_vgpu_cmd_msg msg; | ||
78 | struct tegra_vgpu_attrib_params *p = &msg.params.attrib; | ||
79 | int err; | ||
80 | |||
81 | msg.cmd = TEGRA_VGPU_CMD_GET_ATTRIBUTE; | ||
82 | msg.handle = handle; | ||
83 | p->attrib = attrib; | ||
84 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
85 | |||
86 | if (err || msg.ret) | ||
87 | return -1; | ||
88 | |||
89 | *value = p->value; | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | static int vgpu_intr_thread(void *dev_id) | ||
94 | { | ||
95 | struct gk20a *g = dev_id; | ||
96 | |||
97 | while (true) { | ||
98 | struct tegra_vgpu_intr_msg *msg; | ||
99 | u32 sender; | ||
100 | void *handle; | ||
101 | size_t size; | ||
102 | int err; | ||
103 | |||
104 | err = tegra_gr_comm_recv(TEGRA_GR_COMM_CTX_CLIENT, | ||
105 | TEGRA_VGPU_QUEUE_INTR, &handle, | ||
106 | (void **)&msg, &size, &sender); | ||
107 | if (WARN_ON(err)) | ||
108 | continue; | ||
109 | |||
110 | if (msg->event == TEGRA_VGPU_EVENT_ABORT) { | ||
111 | tegra_gr_comm_release(handle); | ||
112 | break; | ||
113 | } | ||
114 | |||
115 | if (msg->unit == TEGRA_VGPU_INTR_GR) | ||
116 | vgpu_gr_isr(g, &msg->info.gr_intr); | ||
117 | |||
118 | tegra_gr_comm_release(handle); | ||
119 | } | ||
120 | |||
121 | while (!kthread_should_stop()) | ||
122 | msleep(10); | ||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | static void vgpu_remove_support(struct platform_device *dev) | ||
127 | { | ||
128 | struct gk20a *g = get_gk20a(dev); | ||
129 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
130 | struct tegra_vgpu_intr_msg msg; | ||
131 | int err; | ||
132 | |||
133 | if (g->pmu.remove_support) | ||
134 | g->pmu.remove_support(&g->pmu); | ||
135 | |||
136 | if (g->gr.remove_support) | ||
137 | g->gr.remove_support(&g->gr); | ||
138 | |||
139 | if (g->fifo.remove_support) | ||
140 | g->fifo.remove_support(&g->fifo); | ||
141 | |||
142 | if (g->mm.remove_support) | ||
143 | g->mm.remove_support(&g->mm); | ||
144 | |||
145 | msg.event = TEGRA_VGPU_EVENT_ABORT; | ||
146 | err = tegra_gr_comm_send(TEGRA_GR_COMM_CTX_CLIENT, | ||
147 | TEGRA_GR_COMM_ID_SELF, TEGRA_VGPU_QUEUE_INTR, | ||
148 | &msg, sizeof(msg)); | ||
149 | WARN_ON(err); | ||
150 | kthread_stop(platform->intr_handler); | ||
151 | |||
152 | /* free mappings to registers, etc*/ | ||
153 | |||
154 | if (g->bar1) { | ||
155 | iounmap(g->bar1); | ||
156 | g->bar1 = 0; | ||
157 | } | ||
158 | } | ||
159 | |||
160 | static int vgpu_init_support(struct platform_device *dev) | ||
161 | { | ||
162 | struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, 0); | ||
163 | struct gk20a *g = get_gk20a(dev); | ||
164 | int err = 0; | ||
165 | |||
166 | if (!r) { | ||
167 | dev_err(dev_from_gk20a(g), "faield to get gk20a bar1\n"); | ||
168 | err = -ENXIO; | ||
169 | goto fail; | ||
170 | } | ||
171 | |||
172 | g->bar1 = devm_request_and_ioremap(&dev->dev, r); | ||
173 | if (!g->bar1) { | ||
174 | dev_err(dev_from_gk20a(g), "failed to remap gk20a bar1\n"); | ||
175 | err = -ENXIO; | ||
176 | goto fail; | ||
177 | } | ||
178 | |||
179 | mutex_init(&g->dbg_sessions_lock); | ||
180 | mutex_init(&g->client_lock); | ||
181 | |||
182 | g->remove_support = vgpu_remove_support; | ||
183 | return 0; | ||
184 | |||
185 | fail: | ||
186 | vgpu_remove_support(dev); | ||
187 | return err; | ||
188 | } | ||
189 | |||
190 | int vgpu_pm_prepare_poweroff(struct device *dev) | ||
191 | { | ||
192 | struct platform_device *pdev = to_platform_device(dev); | ||
193 | struct gk20a *g = get_gk20a(pdev); | ||
194 | int ret = 0; | ||
195 | |||
196 | gk20a_dbg_fn(""); | ||
197 | |||
198 | if (!g->power_on) | ||
199 | return 0; | ||
200 | |||
201 | ret = gk20a_channel_suspend(g); | ||
202 | if (ret) | ||
203 | return ret; | ||
204 | |||
205 | g->power_on = false; | ||
206 | |||
207 | return ret; | ||
208 | } | ||
209 | |||
210 | static void vgpu_detect_chip(struct gk20a *g) | ||
211 | { | ||
212 | struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics; | ||
213 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
214 | |||
215 | u32 mc_boot_0_value; | ||
216 | |||
217 | if (vgpu_get_attribute(platform->virt_handle, | ||
218 | TEGRA_VGPU_ATTRIB_PMC_BOOT_0, | ||
219 | &mc_boot_0_value)) { | ||
220 | gk20a_err(dev_from_gk20a(g), "failed to detect chip"); | ||
221 | return; | ||
222 | } | ||
223 | |||
224 | gpu->arch = mc_boot_0_architecture_v(mc_boot_0_value) << | ||
225 | NVHOST_GPU_ARCHITECTURE_SHIFT; | ||
226 | gpu->impl = mc_boot_0_implementation_v(mc_boot_0_value); | ||
227 | gpu->rev = | ||
228 | (mc_boot_0_major_revision_v(mc_boot_0_value) << 4) | | ||
229 | mc_boot_0_minor_revision_v(mc_boot_0_value); | ||
230 | |||
231 | gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n", | ||
232 | g->gpu_characteristics.arch, | ||
233 | g->gpu_characteristics.impl, | ||
234 | g->gpu_characteristics.rev); | ||
235 | } | ||
236 | |||
237 | static int vgpu_init_hal(struct gk20a *g) | ||
238 | { | ||
239 | u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; | ||
240 | |||
241 | switch (ver) { | ||
242 | case GK20A_GPUID_GK20A: | ||
243 | gk20a_dbg_info("gk20a detected"); | ||
244 | /* init gk20a ops then override with virt extensions */ | ||
245 | gk20a_init_hal(&g->ops); | ||
246 | vgpu_init_fifo_ops(&g->ops); | ||
247 | vgpu_init_gr_ops(&g->ops); | ||
248 | vgpu_init_ltc_ops(&g->ops); | ||
249 | vgpu_init_mm_ops(&g->ops); | ||
250 | break; | ||
251 | default: | ||
252 | gk20a_err(&g->dev->dev, "no support for %x", ver); | ||
253 | return -ENODEV; | ||
254 | } | ||
255 | |||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | int vgpu_pm_finalize_poweron(struct device *dev) | ||
260 | { | ||
261 | struct platform_device *pdev = to_platform_device(dev); | ||
262 | struct gk20a *g = get_gk20a(pdev); | ||
263 | int err; | ||
264 | |||
265 | gk20a_dbg_fn(""); | ||
266 | |||
267 | if (g->power_on) | ||
268 | return 0; | ||
269 | |||
270 | g->power_on = true; | ||
271 | |||
272 | vgpu_detect_chip(g); | ||
273 | err = vgpu_init_hal(g); | ||
274 | if (err) | ||
275 | goto done; | ||
276 | |||
277 | err = vgpu_init_mm_support(g); | ||
278 | if (err) { | ||
279 | gk20a_err(dev, "failed to init gk20a mm"); | ||
280 | goto done; | ||
281 | } | ||
282 | |||
283 | err = vgpu_init_fifo_support(g); | ||
284 | if (err) { | ||
285 | gk20a_err(dev, "failed to init gk20a fifo"); | ||
286 | goto done; | ||
287 | } | ||
288 | |||
289 | err = vgpu_init_gr_support(g); | ||
290 | if (err) { | ||
291 | gk20a_err(dev, "failed to init gk20a gr"); | ||
292 | goto done; | ||
293 | } | ||
294 | |||
295 | err = gk20a_init_gpu_characteristics(g); | ||
296 | if (err) { | ||
297 | gk20a_err(dev, "failed to init gk20a gpu characteristics"); | ||
298 | goto done; | ||
299 | } | ||
300 | |||
301 | gk20a_channel_resume(g); | ||
302 | |||
303 | done: | ||
304 | return err; | ||
305 | } | ||
306 | |||
307 | static int vgpu_pm_init(struct platform_device *dev) | ||
308 | { | ||
309 | int err = 0; | ||
310 | |||
311 | gk20a_dbg_fn(""); | ||
312 | |||
313 | pm_runtime_enable(&dev->dev); | ||
314 | return err; | ||
315 | } | ||
316 | |||
317 | int vgpu_probe(struct platform_device *dev) | ||
318 | { | ||
319 | struct gk20a *gk20a; | ||
320 | int err; | ||
321 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
322 | |||
323 | if (!platform) { | ||
324 | dev_err(&dev->dev, "no platform data\n"); | ||
325 | return -ENODATA; | ||
326 | } | ||
327 | |||
328 | gk20a_dbg_fn(""); | ||
329 | |||
330 | gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL); | ||
331 | if (!gk20a) { | ||
332 | dev_err(&dev->dev, "couldn't allocate gk20a support"); | ||
333 | return -ENOMEM; | ||
334 | } | ||
335 | |||
336 | platform->g = gk20a; | ||
337 | gk20a->dev = dev; | ||
338 | |||
339 | err = gk20a_user_init(dev); | ||
340 | if (err) | ||
341 | return err; | ||
342 | |||
343 | vgpu_init_support(dev); | ||
344 | |||
345 | init_rwsem(&gk20a->busy_lock); | ||
346 | |||
347 | spin_lock_init(&gk20a->mc_enable_lock); | ||
348 | |||
349 | /* Initialize the platform interface. */ | ||
350 | err = platform->probe(dev); | ||
351 | if (err) { | ||
352 | dev_err(&dev->dev, "platform probe failed"); | ||
353 | return err; | ||
354 | } | ||
355 | |||
356 | err = vgpu_pm_init(dev); | ||
357 | if (err) { | ||
358 | dev_err(&dev->dev, "pm init failed"); | ||
359 | return err; | ||
360 | } | ||
361 | |||
362 | if (platform->late_probe) { | ||
363 | err = platform->late_probe(dev); | ||
364 | if (err) { | ||
365 | dev_err(&dev->dev, "late probe failed"); | ||
366 | return err; | ||
367 | } | ||
368 | } | ||
369 | |||
370 | err = vgpu_comm_init(dev); | ||
371 | if (err) { | ||
372 | dev_err(&dev->dev, "failed to init comm interface\n"); | ||
373 | return -ENOSYS; | ||
374 | } | ||
375 | |||
376 | platform->virt_handle = vgpu_connect(); | ||
377 | if (!platform->virt_handle) { | ||
378 | dev_err(&dev->dev, "failed to connect to server node\n"); | ||
379 | vgpu_comm_deinit(); | ||
380 | return -ENOSYS; | ||
381 | } | ||
382 | |||
383 | platform->intr_handler = kthread_run(vgpu_intr_thread, gk20a, "gk20a"); | ||
384 | if (IS_ERR(platform->intr_handler)) | ||
385 | return -ENOMEM; | ||
386 | |||
387 | gk20a_debug_init(dev); | ||
388 | |||
389 | /* Set DMA parameters to allow larger sgt lists */ | ||
390 | dev->dev.dma_parms = &gk20a->dma_parms; | ||
391 | dma_set_max_seg_size(&dev->dev, UINT_MAX); | ||
392 | |||
393 | gk20a->gr_idle_timeout_default = | ||
394 | CONFIG_GK20A_DEFAULT_TIMEOUT; | ||
395 | gk20a->timeouts_enabled = true; | ||
396 | |||
397 | gk20a_create_sysfs(dev); | ||
398 | gk20a_init_gr(gk20a); | ||
399 | |||
400 | return 0; | ||
401 | } | ||
402 | |||
403 | int vgpu_remove(struct platform_device *dev) | ||
404 | { | ||
405 | struct gk20a *g = get_gk20a(dev); | ||
406 | gk20a_dbg_fn(""); | ||
407 | |||
408 | if (g->remove_support) | ||
409 | g->remove_support(dev); | ||
410 | |||
411 | vgpu_comm_deinit(); | ||
412 | gk20a_user_deinit(dev); | ||
413 | gk20a_get_platform(dev)->g = NULL; | ||
414 | kfree(g); | ||
415 | return 0; | ||
416 | } | ||
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.h b/drivers/gpu/nvgpu/vgpu/vgpu.h new file mode 100644 index 00000000..445a1c90 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/vgpu.h | |||
@@ -0,0 +1,41 @@ | |||
1 | /* | ||
2 | * Virtualized GPU Interfaces | ||
3 | * | ||
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #ifndef _VIRT_H_ | ||
17 | #define _VIRT_H_ | ||
18 | |||
19 | #include <linux/tegra_gr_comm.h> | ||
20 | #include <linux/tegra_vgpu.h> | ||
21 | #include "gk20a/gk20a.h" | ||
22 | |||
23 | int vgpu_pm_prepare_poweroff(struct device *dev); | ||
24 | int vgpu_pm_finalize_poweron(struct device *dev); | ||
25 | int vgpu_probe(struct platform_device *dev); | ||
26 | int vgpu_remove(struct platform_device *dev); | ||
27 | u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size); | ||
28 | int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info); | ||
29 | void vgpu_init_fifo_ops(struct gpu_ops *gops); | ||
30 | void vgpu_init_gr_ops(struct gpu_ops *gops); | ||
31 | void vgpu_init_ltc_ops(struct gpu_ops *gops); | ||
32 | void vgpu_init_mm_ops(struct gpu_ops *gops); | ||
33 | int vgpu_init_mm_support(struct gk20a *g); | ||
34 | int vgpu_init_gr_support(struct gk20a *g); | ||
35 | int vgpu_init_fifo_support(struct gk20a *g); | ||
36 | |||
37 | int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value); | ||
38 | int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in, | ||
39 | size_t size_out); | ||
40 | |||
41 | #endif | ||