summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/Makefile1
-rw-r--r--drivers/gpu/nvgpu/gk20a/Makefile1
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c89
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h11
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c9
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c46
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h49
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c115
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h35
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a.h9
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c64
-rw-r--r--drivers/gpu/nvgpu/gm20b/fifo_gm20b.c9
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c5
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c9
-rw-r--r--drivers/gpu/nvgpu/vgpu/Makefile10
-rw-r--r--drivers/gpu/nvgpu/vgpu/fifo_vgpu.c569
-rw-r--r--drivers/gpu/nvgpu/vgpu/gr_vgpu.c687
-rw-r--r--drivers/gpu/nvgpu/vgpu/ltc_vgpu.c55
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c425
-rw-r--r--drivers/gpu/nvgpu/vgpu/vgpu.c416
-rw-r--r--drivers/gpu/nvgpu/vgpu/vgpu.h41
24 files changed, 2576 insertions, 101 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 0fb6090a..6544b315 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -5,3 +5,4 @@ ccflags-y += -Werror
5 5
6obj-$(CONFIG_GK20A) += gk20a/ 6obj-$(CONFIG_GK20A) += gk20a/
7obj-$(CONFIG_GK20A) += gm20b/ 7obj-$(CONFIG_GK20A) += gm20b/
8obj-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += vgpu/
diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile
index aa9237b4..fbc9cbec 100644
--- a/drivers/gpu/nvgpu/gk20a/Makefile
+++ b/drivers/gpu/nvgpu/gk20a/Makefile
@@ -39,5 +39,6 @@ nvgpu-y := \
39 tsg_gk20a.o 39 tsg_gk20a.o
40nvgpu-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o 40nvgpu-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o
41nvgpu-$(CONFIG_SYNC) += sync_gk20a.o 41nvgpu-$(CONFIG_SYNC) += sync_gk20a.o
42nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += platform_vgpu_tegra.o
42 43
43obj-$(CONFIG_GK20A) := nvgpu.o 44obj-$(CONFIG_GK20A) := nvgpu.o
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 4849dbd5..1a1ca8ff 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -40,6 +40,7 @@ static void release_as_share_id(struct gk20a_as *as, int id)
40static int gk20a_as_alloc_share(struct gk20a_as *as, 40static int gk20a_as_alloc_share(struct gk20a_as *as,
41 struct gk20a_as_share **out) 41 struct gk20a_as_share **out)
42{ 42{
43 struct gk20a *g = gk20a_from_as(as);
43 struct gk20a_as_share *as_share; 44 struct gk20a_as_share *as_share;
44 int err = 0; 45 int err = 0;
45 46
@@ -55,7 +56,7 @@ static int gk20a_as_alloc_share(struct gk20a_as *as,
55 as_share->ref_cnt.counter = 1; 56 as_share->ref_cnt.counter = 1;
56 57
57 /* this will set as_share->vm. */ 58 /* this will set as_share->vm. */
58 err = gk20a_vm_alloc_share(as_share); 59 err = g->ops.mm.vm_alloc_share(as_share);
59 if (err) 60 if (err)
60 goto failed; 61 goto failed;
61 62
@@ -106,7 +107,7 @@ static int gk20a_as_ioctl_bind_channel(
106 atomic_inc(&as_share->ref_cnt); 107 atomic_inc(&as_share->ref_cnt);
107 108
108 /* this will set channel_gk20a->vm */ 109 /* this will set channel_gk20a->vm */
109 err = gk20a_vm_bind_channel(as_share, ch); 110 err = ch->g->ops.mm.vm_bind_channel(as_share, ch);
110 if (err) { 111 if (err) {
111 atomic_dec(&as_share->ref_cnt); 112 atomic_dec(&as_share->ref_cnt);
112 return err; 113 return err;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 45757884..669ec294 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -56,16 +56,9 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
56 56
57static int channel_gk20a_commit_userd(struct channel_gk20a *c); 57static int channel_gk20a_commit_userd(struct channel_gk20a *c);
58static int channel_gk20a_setup_userd(struct channel_gk20a *c); 58static int channel_gk20a_setup_userd(struct channel_gk20a *c);
59static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
60 u64 gpfifo_base, u32 gpfifo_entries);
61 59
62static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a); 60static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
63 61
64static int channel_gk20a_alloc_inst(struct gk20a *g,
65 struct channel_gk20a *ch);
66static void channel_gk20a_free_inst(struct gk20a *g,
67 struct channel_gk20a *ch);
68
69static int channel_gk20a_update_runlist(struct channel_gk20a *c, 62static int channel_gk20a_update_runlist(struct channel_gk20a *c,
70 bool add); 63 bool add);
71static void gk20a_free_error_notifiers(struct channel_gk20a *ch); 64static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
@@ -173,12 +166,10 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
173 return -ENOMEM; 166 return -ENOMEM;
174 167
175 /* disable channel */ 168 /* disable channel */
176 gk20a_writel(c->g, ccsr_channel_r(c->hw_chid), 169 c->g->ops.fifo.disable_channel(c);
177 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
178 ccsr_channel_enable_clr_true_f());
179 170
180 /* preempt the channel */ 171 /* preempt the channel */
181 WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid)); 172 WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
182 173
183 /* value field is 8 bits long */ 174 /* value field is 8 bits long */
184 while (value >= 1 << 8) { 175 while (value >= 1 << 8) {
@@ -206,8 +197,8 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
206 return 0; 197 return 0;
207} 198}
208 199
209static int channel_gk20a_setup_ramfc(struct channel_gk20a *c, 200int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
210 u64 gpfifo_base, u32 gpfifo_entries) 201 u64 gpfifo_base, u32 gpfifo_entries)
211{ 202{
212 void *inst_ptr; 203 void *inst_ptr;
213 204
@@ -269,7 +260,7 @@ static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
269 260
270 gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); 261 gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
271 262
272 return 0; 263 return channel_gk20a_commit_userd(c);
273} 264}
274 265
275static int channel_gk20a_setup_userd(struct channel_gk20a *c) 266static int channel_gk20a_setup_userd(struct channel_gk20a *c)
@@ -347,8 +338,7 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
347 } 338 }
348} 339}
349 340
350static int channel_gk20a_alloc_inst(struct gk20a *g, 341int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
351 struct channel_gk20a *ch)
352{ 342{
353 struct device *d = dev_from_gk20a(g); 343 struct device *d = dev_from_gk20a(g);
354 int err = 0; 344 int err = 0;
@@ -384,12 +374,11 @@ static int channel_gk20a_alloc_inst(struct gk20a *g,
384 374
385clean_up: 375clean_up:
386 gk20a_err(d, "fail"); 376 gk20a_err(d, "fail");
387 channel_gk20a_free_inst(g, ch); 377 g->ops.fifo.free_inst(g, ch);
388 return err; 378 return err;
389} 379}
390 380
391static void channel_gk20a_free_inst(struct gk20a *g, 381void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
392 struct channel_gk20a *ch)
393{ 382{
394 struct device *d = dev_from_gk20a(g); 383 struct device *d = dev_from_gk20a(g);
395 384
@@ -403,7 +392,16 @@ static void channel_gk20a_free_inst(struct gk20a *g,
403 392
404static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add) 393static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
405{ 394{
406 return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true); 395 return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
396}
397
398void channel_gk20a_disable(struct channel_gk20a *ch)
399{
400 /* disable channel */
401 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
402 gk20a_readl(ch->g,
403 ccsr_channel_r(ch->hw_chid)) |
404 ccsr_channel_enable_clr_true_f());
407} 405}
408 406
409void gk20a_channel_abort(struct channel_gk20a *ch) 407void gk20a_channel_abort(struct channel_gk20a *ch)
@@ -426,11 +424,7 @@ void gk20a_channel_abort(struct channel_gk20a *ch)
426 } 424 }
427 mutex_unlock(&ch->jobs_lock); 425 mutex_unlock(&ch->jobs_lock);
428 426
429 /* disable channel */ 427 ch->g->ops.fifo.disable_channel(ch);
430 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
431 gk20a_readl(ch->g,
432 ccsr_channel_r(ch->hw_chid)) |
433 ccsr_channel_enable_clr_true_f());
434 428
435 if (released_job_semaphore) { 429 if (released_job_semaphore) {
436 wake_up_interruptible_all(&ch->semaphore_wq); 430 wake_up_interruptible_all(&ch->semaphore_wq);
@@ -479,7 +473,7 @@ void gk20a_disable_channel(struct channel_gk20a *ch,
479 gk20a_wait_channel_idle(ch); 473 gk20a_wait_channel_idle(ch);
480 474
481 /* preempt the channel */ 475 /* preempt the channel */
482 gk20a_fifo_preempt_channel(ch->g, ch->hw_chid); 476 ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
483 477
484 /* remove channel from runlist */ 478 /* remove channel from runlist */
485 channel_gk20a_update_runlist(ch, false); 479 channel_gk20a_update_runlist(ch, false);
@@ -643,7 +637,7 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
643 gk20a_free_error_notifiers(ch); 637 gk20a_free_error_notifiers(ch);
644 638
645 /* release channel ctx */ 639 /* release channel ctx */
646 gk20a_free_channel_ctx(ch); 640 g->ops.gr.free_channel_ctx(ch);
647 641
648 gk20a_gr_flush_channel_tlb(gr); 642 gk20a_gr_flush_channel_tlb(gr);
649 643
@@ -683,8 +677,8 @@ unbind:
683 if (gk20a_is_channel_marked_as_tsg(ch)) 677 if (gk20a_is_channel_marked_as_tsg(ch))
684 gk20a_tsg_unbind_channel(ch); 678 gk20a_tsg_unbind_channel(ch);
685 679
686 channel_gk20a_unbind(ch); 680 g->ops.fifo.unbind_channel(ch);
687 channel_gk20a_free_inst(g, ch); 681 g->ops.fifo.free_inst(g, ch);
688 682
689 ch->vpr = false; 683 ch->vpr = false;
690 ch->vm = NULL; 684 ch->vm = NULL;
@@ -747,7 +741,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
747 741
748 ch->g = g; 742 ch->g = g;
749 743
750 if (channel_gk20a_alloc_inst(g, ch)) { 744 if (g->ops.fifo.alloc_inst(g, ch)) {
751 ch->in_use = false; 745 ch->in_use = false;
752 gk20a_err(dev_from_gk20a(g), 746 gk20a_err(dev_from_gk20a(g),
753 "failed to open gk20a channel, out of inst mem"); 747 "failed to open gk20a channel, out of inst mem");
@@ -1097,7 +1091,6 @@ static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1097 gk20a_dbg_fn("done"); 1091 gk20a_dbg_fn("done");
1098} 1092}
1099 1093
1100
1101int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, 1094int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1102 struct nvhost_alloc_gpfifo_args *args) 1095 struct nvhost_alloc_gpfifo_args *args)
1103{ 1096{
@@ -1181,10 +1174,11 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1181 gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d", 1174 gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1182 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num); 1175 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1183 1176
1184 channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1185
1186 channel_gk20a_setup_userd(c); 1177 channel_gk20a_setup_userd(c);
1187 channel_gk20a_commit_userd(c); 1178
1179 err = g->ops.fifo.setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1180 if (err)
1181 goto clean_up_unmap;
1188 1182
1189 /* TBD: setup engine contexts */ 1183 /* TBD: setup engine contexts */
1190 1184
@@ -1550,7 +1544,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1550 /* We don't know what context is currently running... */ 1544 /* We don't know what context is currently running... */
1551 /* Note also: there can be more than one context associated with the */ 1545 /* Note also: there can be more than one context associated with the */
1552 /* address space (vm). */ 1546 /* address space (vm). */
1553 gk20a_mm_tlb_invalidate(c->vm); 1547 g->ops.mm.tlb_invalidate(c->vm);
1554 1548
1555 /* Make sure we have enough space for gpfifo entries. If not, 1549 /* Make sure we have enough space for gpfifo entries. If not,
1556 * wait for signals from completed submits */ 1550 * wait for signals from completed submits */
@@ -1929,7 +1923,7 @@ static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
1929 1923
1930 gk20a_dbg_fn(""); 1924 gk20a_dbg_fn("");
1931 1925
1932 return gr_gk20a_bind_ctxsw_zcull(g, gr, ch, 1926 return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
1933 args->gpu_va, args->mode); 1927 args->gpu_va, args->mode);
1934} 1928}
1935 1929
@@ -1945,7 +1939,7 @@ int gk20a_channel_suspend(struct gk20a *g)
1945 gk20a_dbg_fn(""); 1939 gk20a_dbg_fn("");
1946 1940
1947 /* wait for engine idle */ 1941 /* wait for engine idle */
1948 err = gk20a_fifo_wait_engine_idle(g); 1942 err = g->ops.fifo.wait_engine_idle(g);
1949 if (err) 1943 if (err)
1950 return err; 1944 return err;
1951 1945
@@ -1954,22 +1948,20 @@ int gk20a_channel_suspend(struct gk20a *g)
1954 1948
1955 gk20a_dbg_info("suspend channel %d", chid); 1949 gk20a_dbg_info("suspend channel %d", chid);
1956 /* disable channel */ 1950 /* disable channel */
1957 gk20a_writel(g, ccsr_channel_r(chid), 1951 g->ops.fifo.disable_channel(&f->channel[chid]);
1958 gk20a_readl(g, ccsr_channel_r(chid)) |
1959 ccsr_channel_enable_clr_true_f());
1960 /* preempt the channel */ 1952 /* preempt the channel */
1961 gk20a_fifo_preempt_channel(g, chid); 1953 g->ops.fifo.preempt_channel(g, chid);
1962 1954
1963 channels_in_use = true; 1955 channels_in_use = true;
1964 } 1956 }
1965 } 1957 }
1966 1958
1967 if (channels_in_use) { 1959 if (channels_in_use) {
1968 gk20a_fifo_update_runlist(g, 0, ~0, false, true); 1960 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
1969 1961
1970 for (chid = 0; chid < f->num_channels; chid++) { 1962 for (chid = 0; chid < f->num_channels; chid++) {
1971 if (f->channel[chid].in_use) 1963 if (f->channel[chid].in_use)
1972 channel_gk20a_unbind(&f->channel[chid]); 1964 g->ops.fifo.unbind_channel(&f->channel[chid]);
1973 } 1965 }
1974 } 1966 }
1975 1967
@@ -1996,7 +1988,7 @@ int gk20a_channel_resume(struct gk20a *g)
1996 } 1988 }
1997 1989
1998 if (channels_in_use) 1990 if (channels_in_use)
1999 gk20a_fifo_update_runlist(g, 0, ~0, true, true); 1991 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
2000 1992
2001 gk20a_dbg_fn("done"); 1993 gk20a_dbg_fn("done");
2002 return 0; 1994 return 0;
@@ -2074,6 +2066,11 @@ clean_up:
2074void gk20a_init_channel(struct gpu_ops *gops) 2066void gk20a_init_channel(struct gpu_ops *gops)
2075{ 2067{
2076 gops->fifo.bind_channel = channel_gk20a_bind; 2068 gops->fifo.bind_channel = channel_gk20a_bind;
2069 gops->fifo.unbind_channel = channel_gk20a_unbind;
2070 gops->fifo.disable_channel = channel_gk20a_disable;
2071 gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2072 gops->fifo.free_inst = channel_gk20a_free_inst;
2073 gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2077} 2074}
2078 2075
2079long gk20a_channel_ioctl(struct file *filp, 2076long gk20a_channel_ioctl(struct file *filp,
@@ -2144,7 +2141,7 @@ long gk20a_channel_ioctl(struct file *filp,
2144 __func__, cmd); 2141 __func__, cmd);
2145 return err; 2142 return err;
2146 } 2143 }
2147 err = gk20a_alloc_obj_ctx(ch, 2144 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2148 (struct nvhost_alloc_obj_ctx_args *)buf); 2145 (struct nvhost_alloc_obj_ctx_args *)buf);
2149 gk20a_idle(dev); 2146 gk20a_idle(dev);
2150 break; 2147 break;
@@ -2156,7 +2153,7 @@ long gk20a_channel_ioctl(struct file *filp,
2156 __func__, cmd); 2153 __func__, cmd);
2157 return err; 2154 return err;
2158 } 2155 }
2159 err = gk20a_free_obj_ctx(ch, 2156 err = ch->g->ops.gr.free_obj_ctx(ch,
2160 (struct nvhost_free_obj_ctx_args *)buf); 2157 (struct nvhost_free_obj_ctx_args *)buf);
2161 gk20a_idle(dev); 2158 gk20a_idle(dev);
2162 break; 2159 break;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 2ea3eccb..37ca8244 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -144,6 +144,10 @@ struct channel_gk20a {
144 void *error_notifier_va; 144 void *error_notifier_va;
145 145
146 struct gk20a_channel_sync *sync; 146 struct gk20a_channel_sync *sync;
147
148#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
149 u64 virt_ctx;
150#endif
147}; 151};
148 152
149static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) 153static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
@@ -193,4 +197,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
193int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, 197int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
194 struct nvhost_alloc_gpfifo_args *args); 198 struct nvhost_alloc_gpfifo_args *args);
195 199
200void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
201void channel_gk20a_disable(struct channel_gk20a *ch);
202int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch);
203void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch);
204int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
205 u64 gpfifo_base, u32 gpfifo_entries);
206
196#endif /*__CHANNEL_GK20A_H__*/ 207#endif /*__CHANNEL_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index e5628c3f..7338f842 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -158,6 +158,9 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
158 struct zbc_entry *zbc_val; 158 struct zbc_entry *zbc_val;
159 struct zbc_query_params *zbc_tbl; 159 struct zbc_query_params *zbc_tbl;
160 int i, err = 0; 160 int i, err = 0;
161#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
162 struct gk20a_platform *platform = platform_get_drvdata(dev);
163#endif
161 164
162 gk20a_dbg_fn(""); 165 gk20a_dbg_fn("");
163 166
@@ -197,7 +200,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
197 if (zcull_info == NULL) 200 if (zcull_info == NULL)
198 return -ENOMEM; 201 return -ENOMEM;
199 202
200 err = gr_gk20a_get_zcull_info(g, &g->gr, zcull_info); 203 err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info);
201 if (err) { 204 if (err) {
202 kfree(zcull_info); 205 kfree(zcull_info);
203 break; 206 break;
@@ -219,6 +222,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
219 case NVHOST_GPU_IOCTL_ZBC_SET_TABLE: 222 case NVHOST_GPU_IOCTL_ZBC_SET_TABLE:
220 set_table_args = (struct nvhost_gpu_zbc_set_table_args *)buf; 223 set_table_args = (struct nvhost_gpu_zbc_set_table_args *)buf;
221 224
225#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
226 if (platform->virtual_dev)
227 return -ENOMEM;
228#endif
229
222 zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL); 230 zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL);
223 if (zbc_val == NULL) 231 if (zbc_val == NULL)
224 return -ENOMEM; 232 return -ENOMEM;
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 4363129d..e6b3fd5f 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1173,7 +1173,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
1173 1173
1174 gk20a_channel_abort(ch); 1174 gk20a_channel_abort(ch);
1175 for (i = 0; i < g->fifo.max_runlists; i++) 1175 for (i = 0; i < g->fifo.max_runlists; i++)
1176 gk20a_fifo_update_runlist(g, i, 1176 g->ops.fifo.update_runlist(g, i,
1177 hw_chid, false, false); 1177 hw_chid, false, false);
1178 1178
1179 if (gk20a_fifo_set_ctx_mmu_error(g, ch)) 1179 if (gk20a_fifo_set_ctx_mmu_error(g, ch))
@@ -1620,7 +1620,7 @@ int gk20a_fifo_disable_engine_activity(struct gk20a *g,
1620 pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat); 1620 pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat);
1621 1621
1622 if (pbdma_chid != ~0) { 1622 if (pbdma_chid != ~0) {
1623 err = gk20a_fifo_preempt_channel(g, pbdma_chid); 1623 err = g->ops.fifo.preempt_channel(g, pbdma_chid);
1624 if (err) 1624 if (err)
1625 goto clean_up; 1625 goto clean_up;
1626 } 1626 }
@@ -1636,7 +1636,7 @@ int gk20a_fifo_disable_engine_activity(struct gk20a *g,
1636 engine_chid = fifo_engine_status_next_id_v(eng_stat); 1636 engine_chid = fifo_engine_status_next_id_v(eng_stat);
1637 1637
1638 if (engine_chid != ~0 && engine_chid != pbdma_chid) { 1638 if (engine_chid != ~0 && engine_chid != pbdma_chid) {
1639 err = gk20a_fifo_preempt_channel(g, engine_chid); 1639 err = g->ops.fifo.preempt_channel(g, engine_chid);
1640 if (err) 1640 if (err)
1641 goto clean_up; 1641 goto clean_up;
1642 } 1642 }
@@ -1960,6 +1960,9 @@ static void gk20a_fifo_apply_pb_timeout(struct gk20a *g)
1960void gk20a_init_fifo(struct gpu_ops *gops) 1960void gk20a_init_fifo(struct gpu_ops *gops)
1961{ 1961{
1962 gk20a_init_channel(gops); 1962 gk20a_init_channel(gops);
1963 gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
1964 gops->fifo.update_runlist = gk20a_fifo_update_runlist;
1963 gops->fifo.trigger_mmu_fault = gk20a_fifo_trigger_mmu_fault; 1965 gops->fifo.trigger_mmu_fault = gk20a_fifo_trigger_mmu_fault;
1964 gops->fifo.apply_pb_timeout = gk20a_fifo_apply_pb_timeout; 1966 gops->fifo.apply_pb_timeout = gk20a_fifo_apply_pb_timeout;
1967 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
1965} 1968}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 0816878a..3499cc89 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -40,6 +40,7 @@
40#include <linux/tegra-powergate.h> 40#include <linux/tegra-powergate.h>
41#include <linux/tegra_pm_domains.h> 41#include <linux/tegra_pm_domains.h>
42#include <linux/clk/tegra.h> 42#include <linux/clk/tegra.h>
43#include <linux/kthread.h>
43 44
44#include <linux/sched.h> 45#include <linux/sched.h>
45#include <linux/input-cfboost.h> 46#include <linux/input-cfboost.h>
@@ -57,6 +58,9 @@
57#include "dbg_gpu_gk20a.h" 58#include "dbg_gpu_gk20a.h"
58#include "hal.h" 59#include "hal.h"
59#include "nvhost_acm.h" 60#include "nvhost_acm.h"
61#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
62#include "vgpu/vgpu.h"
63#endif
60 64
61#define CREATE_TRACE_POINTS 65#define CREATE_TRACE_POINTS
62#include <trace/events/gk20a.h> 66#include <trace/events/gk20a.h>
@@ -737,6 +741,17 @@ static int gk20a_init_client(struct platform_device *dev)
737 741
738 gk20a_dbg_fn(""); 742 gk20a_dbg_fn("");
739 743
744#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
745 {
746 struct gk20a_platform *platform = gk20a_get_platform(dev);
747
748 if (platform->virtual_dev) {
749 err = vgpu_pm_finalize_poweron(&dev->dev);
750 if (err)
751 return err;
752 }
753 }
754#endif
740#ifndef CONFIG_PM_RUNTIME 755#ifndef CONFIG_PM_RUNTIME
741 gk20a_pm_finalize_poweron(&dev->dev); 756 gk20a_pm_finalize_poweron(&dev->dev);
742#endif 757#endif
@@ -753,6 +768,16 @@ static int gk20a_init_client(struct platform_device *dev)
753static void gk20a_deinit_client(struct platform_device *dev) 768static void gk20a_deinit_client(struct platform_device *dev)
754{ 769{
755 gk20a_dbg_fn(""); 770 gk20a_dbg_fn("");
771#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
772 {
773 struct gk20a_platform *platform = gk20a_get_platform(dev);
774
775 if (platform->virtual_dev) {
776 vgpu_pm_prepare_poweroff(&dev->dev);
777 return;
778 }
779 }
780#endif
756#ifndef CONFIG_PM_RUNTIME 781#ifndef CONFIG_PM_RUNTIME
757 gk20a_pm_prepare_poweroff(&dev->dev); 782 gk20a_pm_prepare_poweroff(&dev->dev);
758#endif 783#endif
@@ -1006,6 +1031,10 @@ static struct of_device_id tegra_gk20a_of_match[] = {
1006 .data = &gk20a_tegra_platform }, 1031 .data = &gk20a_tegra_platform },
1007 { .compatible = "nvidia,tegra210-gm20b", 1032 { .compatible = "nvidia,tegra210-gm20b",
1008 .data = &gm20b_tegra_platform }, 1033 .data = &gm20b_tegra_platform },
1034#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
1035 { .compatible = "nvidia,tegra124-gk20a-vgpu",
1036 .data = &vgpu_tegra_platform },
1037#endif
1009#else 1038#else
1010 { .compatible = "nvidia,tegra124-gk20a", 1039 { .compatible = "nvidia,tegra124-gk20a",
1011 .data = &gk20a_generic_platform }, 1040 .data = &gk20a_generic_platform },
@@ -1057,7 +1086,7 @@ static int gk20a_create_device(
1057 return 0; 1086 return 0;
1058} 1087}
1059 1088
1060static void gk20a_user_deinit(struct platform_device *dev) 1089void gk20a_user_deinit(struct platform_device *dev)
1061{ 1090{
1062 struct gk20a *g = get_gk20a(dev); 1091 struct gk20a *g = get_gk20a(dev);
1063 1092
@@ -1098,7 +1127,7 @@ static void gk20a_user_deinit(struct platform_device *dev)
1098 class_destroy(g->class); 1127 class_destroy(g->class);
1099} 1128}
1100 1129
1101static int gk20a_user_init(struct platform_device *dev) 1130int gk20a_user_init(struct platform_device *dev)
1102{ 1131{
1103 int err; 1132 int err;
1104 dev_t devno; 1133 dev_t devno;
@@ -1403,6 +1432,11 @@ static int gk20a_probe(struct platform_device *dev)
1403 1432
1404 platform_set_drvdata(dev, platform); 1433 platform_set_drvdata(dev, platform);
1405 1434
1435#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
1436 if (platform->virtual_dev)
1437 return vgpu_probe(dev);
1438#endif
1439
1406 gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL); 1440 gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
1407 if (!gk20a) { 1441 if (!gk20a) {
1408 dev_err(&dev->dev, "couldn't allocate gk20a support"); 1442 dev_err(&dev->dev, "couldn't allocate gk20a support");
@@ -1546,8 +1580,16 @@ static int gk20a_probe(struct platform_device *dev)
1546static int __exit gk20a_remove(struct platform_device *dev) 1580static int __exit gk20a_remove(struct platform_device *dev)
1547{ 1581{
1548 struct gk20a *g = get_gk20a(dev); 1582 struct gk20a *g = get_gk20a(dev);
1583#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
1584 struct gk20a_platform *platform = gk20a_get_platform(dev);
1585#endif
1549 gk20a_dbg_fn(""); 1586 gk20a_dbg_fn("");
1550 1587
1588#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
1589 if (platform->virtual_dev)
1590 return vgpu_remove(dev);
1591#endif
1592
1551#ifdef CONFIG_INPUT_CFBOOST 1593#ifdef CONFIG_INPUT_CFBOOST
1552 if (g->boost_added) 1594 if (g->boost_added)
1553 cfb_remove_device(&dev->dev); 1595 cfb_remove_device(&dev->dev);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index a1080f0b..b813541a 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -131,6 +131,16 @@ struct gpu_ops {
131 u32 reg_offset); 131 u32 reg_offset);
132 int (*load_ctxsw_ucode)(struct gk20a *g); 132 int (*load_ctxsw_ucode)(struct gk20a *g);
133 u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); 133 u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
134 void (*free_channel_ctx)(struct channel_gk20a *c);
135 int (*alloc_obj_ctx)(struct channel_gk20a *c,
136 struct nvhost_alloc_obj_ctx_args *args);
137 int (*free_obj_ctx)(struct channel_gk20a *c,
138 struct nvhost_free_obj_ctx_args *args);
139 int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr,
140 struct channel_gk20a *c, u64 zcull_va,
141 u32 mode);
142 int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr,
143 struct gr_zcull_info *zcull_params);
134 } gr; 144 } gr;
135 const char *name; 145 const char *name;
136 struct { 146 struct {
@@ -148,9 +158,20 @@ struct gpu_ops {
148 } clock_gating; 158 } clock_gating;
149 struct { 159 struct {
150 void (*bind_channel)(struct channel_gk20a *ch_gk20a); 160 void (*bind_channel)(struct channel_gk20a *ch_gk20a);
161 void (*unbind_channel)(struct channel_gk20a *ch_gk20a);
162 void (*disable_channel)(struct channel_gk20a *ch);
163 int (*alloc_inst)(struct gk20a *g, struct channel_gk20a *ch);
164 void (*free_inst)(struct gk20a *g, struct channel_gk20a *ch);
165 int (*setup_ramfc)(struct channel_gk20a *c, u64 gpfifo_base,
166 u32 gpfifo_entries);
167 int (*preempt_channel)(struct gk20a *g, u32 hw_chid);
168 int (*update_runlist)(struct gk20a *g, u32 runlist_id,
169 u32 hw_chid, bool add,
170 bool wait_for_finish);
151 void (*trigger_mmu_fault)(struct gk20a *g, 171 void (*trigger_mmu_fault)(struct gk20a *g,
152 unsigned long engine_ids); 172 unsigned long engine_ids);
153 void (*apply_pb_timeout)(struct gk20a *g); 173 void (*apply_pb_timeout)(struct gk20a *g);
174 int (*wait_engine_idle)(struct gk20a *g);
154 } fifo; 175 } fifo;
155 struct pmu_v { 176 struct pmu_v {
156 /*used for change of enum zbc update cmd id from ver 0 to ver1*/ 177 /*used for change of enum zbc update cmd id from ver 0 to ver1*/
@@ -241,6 +262,31 @@ struct gpu_ops {
241 void (*clear_sparse)(struct vm_gk20a *vm, u64 vaddr, 262 void (*clear_sparse)(struct vm_gk20a *vm, u64 vaddr,
242 u64 size, u32 pgsz_idx); 263 u64 size, u32 pgsz_idx);
243 bool (*is_debug_mode_enabled)(struct gk20a *g); 264 bool (*is_debug_mode_enabled)(struct gk20a *g);
265 u64 (*gmmu_map)(struct vm_gk20a *vm,
266 u64 map_offset,
267 struct sg_table *sgt,
268 u64 buffer_offset,
269 u64 size,
270 int pgsz_idx,
271 u8 kind_v,
272 u32 ctag_offset,
273 u32 flags,
274 int rw_flag,
275 bool clear_ctags);
276 void (*gmmu_unmap)(struct vm_gk20a *vm,
277 u64 vaddr,
278 u64 size,
279 int pgsz_idx,
280 bool va_allocated,
281 int rw_flag);
282 void (*vm_remove)(struct vm_gk20a *vm);
283 int (*vm_alloc_share)(struct gk20a_as_share *as_share);
284 int (*vm_bind_channel)(struct gk20a_as_share *as_share,
285 struct channel_gk20a *ch);
286 int (*fb_flush)(struct gk20a *g);
287 void (*l2_invalidate)(struct gk20a *g);
288 void (*l2_flush)(struct gk20a *g, bool invalidate);
289 void (*tlb_invalidate)(struct vm_gk20a *vm);
244 } mm; 290 } mm;
245 struct { 291 struct {
246 int (*prepare_ucode)(struct gk20a *g); 292 int (*prepare_ucode)(struct gk20a *g);
@@ -648,4 +694,7 @@ gk20a_request_firmware(struct gk20a *g, const char *fw_name);
648 694
649int gk20a_init_gpu_characteristics(struct gk20a *g); 695int gk20a_init_gpu_characteristics(struct gk20a *g);
650 696
697int gk20a_user_init(struct platform_device *dev);
698void gk20a_user_deinit(struct platform_device *dev);
699
651#endif /* _NVHOST_GK20A_H_ */ 700#endif /* _NVHOST_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index ef7776df..892a138e 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -825,7 +825,7 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
825 } 825 }
826 } 826 }
827 827
828 gk20a_mm_fb_flush(g); 828 g->ops.mm.fb_flush(g);
829 829
830 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, 830 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0,
831 ch_ctx->zcull_ctx.ctx_sw_mode); 831 ch_ctx->zcull_ctx.ctx_sw_mode);
@@ -7077,4 +7077,9 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
7077 gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments; 7077 gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments;
7078 gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; 7078 gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
7079 gops->gr.get_gpc_tpc_mask = gr_gk20a_get_gpc_tpc_mask; 7079 gops->gr.get_gpc_tpc_mask = gr_gk20a_get_gpc_tpc_mask;
7080 gops->gr.free_channel_ctx = gk20a_free_channel_ctx;
7081 gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx;
7082 gops->gr.free_obj_ctx = gk20a_free_obj_ctx;
7083 gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull;
7084 gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
7080} 7085}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 654938b2..3feb675b 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -88,7 +88,6 @@ static inline u32 lo32(u64 f)
88 return (u32)(f & 0xffffffff); 88 return (u32)(f & 0xffffffff);
89} 89}
90 90
91static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
92static struct mapped_buffer_node *find_mapped_buffer_locked( 91static struct mapped_buffer_node *find_mapped_buffer_locked(
93 struct rb_root *root, u64 addr); 92 struct rb_root *root, u64 addr);
94static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( 93static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
@@ -100,7 +99,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
100 u64 first_vaddr, u64 last_vaddr, 99 u64 first_vaddr, u64 last_vaddr,
101 u8 kind_v, u32 ctag_offset, bool cacheable, 100 u8 kind_v, u32 ctag_offset, bool cacheable,
102 int rw_flag); 101 int rw_flag);
103static void gk20a_vm_remove_support(struct vm_gk20a *vm);
104static int gk20a_init_system_vm(struct mm_gk20a *mm); 102static int gk20a_init_system_vm(struct mm_gk20a *mm);
105static int gk20a_init_bar1_vm(struct mm_gk20a *mm); 103static int gk20a_init_bar1_vm(struct mm_gk20a *mm);
106 104
@@ -335,6 +333,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
335 gk20a_init_bar1_vm(mm); 333 gk20a_init_bar1_vm(mm);
336 gk20a_init_system_vm(mm); 334 gk20a_init_system_vm(mm);
337 335
336 /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */
337 g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share;
338 mm->remove_support = gk20a_remove_mm_support; 338 mm->remove_support = gk20a_remove_mm_support;
339 mm->sw_ready = true; 339 mm->sw_ready = true;
340 340
@@ -833,9 +833,9 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
833 mutex_unlock(&vm->update_gmmu_lock); 833 mutex_unlock(&vm->update_gmmu_lock);
834} 834}
835 835
836static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, 836u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
837 u64 size, 837 u64 size,
838 enum gmmu_pgsz_gk20a gmmu_pgsz_idx) 838 enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
839 839
840{ 840{
841 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; 841 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
@@ -881,9 +881,9 @@ static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
881 return offset; 881 return offset;
882} 882}
883 883
884static int gk20a_vm_free_va(struct vm_gk20a *vm, 884int gk20a_vm_free_va(struct vm_gk20a *vm,
885 u64 offset, u64 size, 885 u64 offset, u64 size,
886 enum gmmu_pgsz_gk20a pgsz_idx) 886 enum gmmu_pgsz_gk20a pgsz_idx)
887{ 887{
888 struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; 888 struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
889 u32 page_size = gmmu_page_sizes[pgsz_idx]; 889 u32 page_size = gmmu_page_sizes[pgsz_idx];
@@ -1100,21 +1100,32 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1100 return 0; 1100 return 0;
1101} 1101}
1102 1102
1103static u64 __locked_gmmu_map(struct vm_gk20a *vm, 1103u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
1104 u64 map_offset, 1104 u64 map_offset,
1105 struct sg_table *sgt, 1105 struct sg_table *sgt,
1106 u64 buffer_offset, 1106 u64 buffer_offset,
1107 u64 size, 1107 u64 size,
1108 int pgsz_idx, 1108 int pgsz_idx,
1109 u8 kind_v, 1109 u8 kind_v,
1110 u32 ctag_offset, 1110 u32 ctag_offset,
1111 u32 flags, 1111 u32 flags,
1112 int rw_flag) 1112 int rw_flag,
1113 bool clear_ctags)
1113{ 1114{
1114 int err = 0, i = 0; 1115 int err = 0, i = 0;
1115 bool allocated = false; 1116 bool allocated = false;
1116 u32 pde_lo, pde_hi; 1117 u32 pde_lo, pde_hi;
1117 struct device *d = dev_from_vm(vm); 1118 struct device *d = dev_from_vm(vm);
1119 struct gk20a *g = gk20a_from_vm(vm);
1120
1121 if (clear_ctags && ctag_offset) {
1122 u32 ctag_lines = ALIGN(size, COMP_TAG_LINE_SIZE) >>
1123 COMP_TAG_LINE_SIZE_SHIFT;
1124
1125 /* init/clear the ctag buffer */
1126 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
1127 ctag_offset, ctag_offset + ctag_lines - 1);
1128 }
1118 1129
1119 /* Allocate (or validate when map_offset != 0) the virtual address. */ 1130 /* Allocate (or validate when map_offset != 0) the virtual address. */
1120 if (!map_offset) { 1131 if (!map_offset) {
@@ -1167,12 +1178,12 @@ fail_alloc:
1167 return 0; 1178 return 0;
1168} 1179}
1169 1180
1170static void __locked_gmmu_unmap(struct vm_gk20a *vm, 1181void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
1171 u64 vaddr, 1182 u64 vaddr,
1172 u64 size, 1183 u64 size,
1173 int pgsz_idx, 1184 int pgsz_idx,
1174 bool va_allocated, 1185 bool va_allocated,
1175 int rw_flag) 1186 int rw_flag)
1176{ 1187{
1177 int err = 0; 1188 int err = 0;
1178 struct gk20a *g = gk20a_from_vm(vm); 1189 struct gk20a *g = gk20a_from_vm(vm);
@@ -1298,6 +1309,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1298 struct buffer_attrs bfr = {0}; 1309 struct buffer_attrs bfr = {0};
1299 struct gk20a_comptags comptags; 1310 struct gk20a_comptags comptags;
1300 u64 buf_addr; 1311 u64 buf_addr;
1312 bool clear_ctags = false;
1301 1313
1302 mutex_lock(&vm->update_gmmu_lock); 1314 mutex_lock(&vm->update_gmmu_lock);
1303 1315
@@ -1402,11 +1414,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1402 bfr.kind_v = bfr.uc_kind_v; 1414 bfr.kind_v = bfr.uc_kind_v;
1403 } else { 1415 } else {
1404 gk20a_get_comptags(d, dmabuf, &comptags); 1416 gk20a_get_comptags(d, dmabuf, &comptags);
1405 1417 clear_ctags = true;
1406 /* init/clear the ctag buffer */
1407 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
1408 comptags.offset,
1409 comptags.offset + comptags.lines - 1);
1410 } 1418 }
1411 } 1419 }
1412 1420
@@ -1414,15 +1422,15 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1414 bfr.ctag_offset = comptags.offset; 1422 bfr.ctag_offset = comptags.offset;
1415 1423
1416 /* update gmmu ptes */ 1424 /* update gmmu ptes */
1417 map_offset = __locked_gmmu_map(vm, map_offset, 1425 map_offset = g->ops.mm.gmmu_map(vm, map_offset,
1418 bfr.sgt, 1426 bfr.sgt,
1419 buffer_offset, /* sg offset */ 1427 buffer_offset, /* sg offset */
1420 mapping_size, 1428 mapping_size,
1421 bfr.pgsz_idx, 1429 bfr.pgsz_idx,
1422 bfr.kind_v, 1430 bfr.kind_v,
1423 bfr.ctag_offset, 1431 bfr.ctag_offset,
1424 flags, rw_flag); 1432 flags, rw_flag,
1425 1433 clear_ctags);
1426 if (!map_offset) 1434 if (!map_offset)
1427 goto clean_up; 1435 goto clean_up;
1428 1436
@@ -1531,17 +1539,18 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1531 u32 flags, 1539 u32 flags,
1532 int rw_flag) 1540 int rw_flag)
1533{ 1541{
1542 struct gk20a *g = gk20a_from_vm(vm);
1534 u64 vaddr; 1543 u64 vaddr;
1535 1544
1536 mutex_lock(&vm->update_gmmu_lock); 1545 mutex_lock(&vm->update_gmmu_lock);
1537 vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */ 1546 vaddr = g->ops.mm.gmmu_map(vm, 0, /* already mapped? - No */
1538 *sgt, /* sg table */ 1547 *sgt, /* sg table */
1539 0, /* sg offset */ 1548 0, /* sg offset */
1540 size, 1549 size,
1541 0, /* page size index = 0 i.e. SZ_4K */ 1550 0, /* page size index = 0 i.e. SZ_4K */
1542 0, /* kind */ 1551 0, /* kind */
1543 0, /* ctag_offset */ 1552 0, /* ctag_offset */
1544 flags, rw_flag); 1553 flags, rw_flag, false);
1545 mutex_unlock(&vm->update_gmmu_lock); 1554 mutex_unlock(&vm->update_gmmu_lock);
1546 if (!vaddr) { 1555 if (!vaddr) {
1547 gk20a_err(dev_from_vm(vm), "failed to allocate va space"); 1556 gk20a_err(dev_from_vm(vm), "failed to allocate va space");
@@ -1549,7 +1558,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1549 } 1558 }
1550 1559
1551 /* Invalidate kernel mappings immediately */ 1560 /* Invalidate kernel mappings immediately */
1552 gk20a_mm_tlb_invalidate(vm); 1561 g->ops.mm.tlb_invalidate(vm);
1553 1562
1554 return vaddr; 1563 return vaddr;
1555} 1564}
@@ -1573,8 +1582,10 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm,
1573 u64 size, 1582 u64 size,
1574 int rw_flag) 1583 int rw_flag)
1575{ 1584{
1585 struct gk20a *g = gk20a_from_vm(vm);
1586
1576 mutex_lock(&vm->update_gmmu_lock); 1587 mutex_lock(&vm->update_gmmu_lock);
1577 __locked_gmmu_unmap(vm, 1588 g->ops.mm.gmmu_unmap(vm,
1578 vaddr, 1589 vaddr,
1579 size, 1590 size,
1580 0, /* page size 4K */ 1591 0, /* page size 4K */
@@ -1970,10 +1981,10 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1970 } 1981 }
1971 1982
1972 for (i = 0; i < num_pages; i++) { 1983 for (i = 0; i < num_pages; i++) {
1973 u64 page_vaddr = __locked_gmmu_map(vm, vaddr, 1984 u64 page_vaddr = g->ops.mm.gmmu_map(vm, vaddr,
1974 vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0, 1985 vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0,
1975 NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET, 1986 NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
1976 gk20a_mem_flag_none); 1987 gk20a_mem_flag_none, false);
1977 1988
1978 if (!page_vaddr) { 1989 if (!page_vaddr) {
1979 gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!"); 1990 gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!");
@@ -1990,7 +2001,7 @@ err_unmap:
1990 /* something went wrong. unmap pages */ 2001 /* something went wrong. unmap pages */
1991 while (i--) { 2002 while (i--) {
1992 vaddr -= pgsz; 2003 vaddr -= pgsz;
1993 __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0, 2004 g->ops.mm.gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
1994 gk20a_mem_flag_none); 2005 gk20a_mem_flag_none);
1995 } 2006 }
1996 2007
@@ -2005,12 +2016,14 @@ static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
2005 2016
2006void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr, 2017void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
2007 u64 size, u32 pgsz_idx) { 2018 u64 size, u32 pgsz_idx) {
2008 __locked_gmmu_unmap(vm, vaddr, size, pgsz_idx, 2019 struct gk20a *g = vm->mm->g;
2009 false, gk20a_mem_flag_none); 2020
2021 g->ops.mm.gmmu_unmap(vm, vaddr, size, pgsz_idx,
2022 false, gk20a_mem_flag_none);
2010} 2023}
2011 2024
2012/* NOTE! mapped_buffers lock must be held */ 2025/* NOTE! mapped_buffers lock must be held */
2013static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) 2026void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2014{ 2027{
2015 struct vm_gk20a *vm = mapped_buffer->vm; 2028 struct vm_gk20a *vm = mapped_buffer->vm;
2016 struct gk20a *g = vm->mm->g; 2029 struct gk20a *g = vm->mm->g;
@@ -2026,7 +2039,7 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2026 if (g->ops.mm.put_empty) { 2039 if (g->ops.mm.put_empty) {
2027 g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx); 2040 g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx);
2028 } else { 2041 } else {
2029 __locked_gmmu_unmap(vm, 2042 g->ops.mm.gmmu_unmap(vm,
2030 mapped_buffer->addr, 2043 mapped_buffer->addr,
2031 mapped_buffer->size, 2044 mapped_buffer->size,
2032 mapped_buffer->pgsz_idx, 2045 mapped_buffer->pgsz_idx,
@@ -2036,7 +2049,7 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2036 num_pages, pgsz_idx, false); 2049 num_pages, pgsz_idx, false);
2037 } 2050 }
2038 } else 2051 } else
2039 __locked_gmmu_unmap(vm, 2052 g->ops.mm.gmmu_unmap(vm,
2040 mapped_buffer->addr, 2053 mapped_buffer->addr,
2041 mapped_buffer->size, 2054 mapped_buffer->size,
2042 mapped_buffer->pgsz_idx, 2055 mapped_buffer->pgsz_idx,
@@ -2085,7 +2098,7 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
2085 mutex_unlock(&vm->update_gmmu_lock); 2098 mutex_unlock(&vm->update_gmmu_lock);
2086} 2099}
2087 2100
2088static void gk20a_vm_remove_support(struct vm_gk20a *vm) 2101void gk20a_vm_remove_support(struct vm_gk20a *vm)
2089{ 2102{
2090 struct gk20a *g = vm->mm->g; 2103 struct gk20a *g = vm->mm->g;
2091 struct mapped_buffer_node *mapped_buffer; 2104 struct mapped_buffer_node *mapped_buffer;
@@ -2156,7 +2169,8 @@ static void gk20a_vm_remove_support(struct vm_gk20a *vm)
2156static void gk20a_vm_remove_support_kref(struct kref *ref) 2169static void gk20a_vm_remove_support_kref(struct kref *ref)
2157{ 2170{
2158 struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref); 2171 struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
2159 gk20a_vm_remove_support(vm); 2172 struct gk20a *g = gk20a_from_vm(vm);
2173 g->ops.mm.vm_remove(vm);
2160} 2174}
2161 2175
2162void gk20a_vm_get(struct vm_gk20a *vm) 2176void gk20a_vm_get(struct vm_gk20a *vm)
@@ -3124,5 +3138,14 @@ void gk20a_init_mm(struct gpu_ops *gops)
3124 gops->mm.put_empty = gk20a_vm_put_empty; 3138 gops->mm.put_empty = gk20a_vm_put_empty;
3125 gops->mm.clear_sparse = gk20a_vm_clear_sparse; 3139 gops->mm.clear_sparse = gk20a_vm_clear_sparse;
3126 gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; 3140 gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled;
3141 gops->mm.gmmu_map = gk20a_locked_gmmu_map;
3142 gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
3143 gops->mm.vm_remove = gk20a_vm_remove_support;
3144 gops->mm.vm_alloc_share = gk20a_vm_alloc_share;
3145 gops->mm.vm_bind_channel = gk20a_vm_bind_channel;
3146 gops->mm.fb_flush = gk20a_mm_fb_flush;
3147 gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
3148 gops->mm.l2_flush = gk20a_mm_l2_flush;
3149 gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
3127} 3150}
3128 3151
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index b8726c62..f06c465a 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -318,6 +318,10 @@ struct vm_gk20a {
318 dma_addr_t zero_page_iova; 318 dma_addr_t zero_page_iova;
319 void *zero_page_cpuva; 319 void *zero_page_cpuva;
320 struct sg_table *zero_page_sgt; 320 struct sg_table *zero_page_sgt;
321
322#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
323 u64 handle;
324#endif
321}; 325};
322 326
323struct gk20a; 327struct gk20a;
@@ -438,11 +442,30 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
438 u32 flags, 442 u32 flags,
439 int rw_flag); 443 int rw_flag);
440 444
445u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
446 u64 map_offset,
447 struct sg_table *sgt,
448 u64 buffer_offset,
449 u64 size,
450 int pgsz_idx,
451 u8 kind_v,
452 u32 ctag_offset,
453 u32 flags,
454 int rw_flag,
455 bool clear_ctags);
456
441void gk20a_gmmu_unmap(struct vm_gk20a *vm, 457void gk20a_gmmu_unmap(struct vm_gk20a *vm,
442 u64 vaddr, 458 u64 vaddr,
443 u64 size, 459 u64 size,
444 int rw_flag); 460 int rw_flag);
445 461
462void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
463 u64 vaddr,
464 u64 size,
465 int pgsz_idx,
466 bool va_allocated,
467 int rw_flag);
468
446struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); 469struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
447void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, 470void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
448 struct sg_table *sgt); 471 struct sg_table *sgt);
@@ -461,6 +484,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
461/* unmap handle from kernel */ 484/* unmap handle from kernel */
462void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); 485void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
463 486
487void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
488
464/* get reference to all currently mapped buffers */ 489/* get reference to all currently mapped buffers */
465int gk20a_vm_get_buffers(struct vm_gk20a *vm, 490int gk20a_vm_get_buffers(struct vm_gk20a *vm,
466 struct mapped_buffer_node ***mapped_buffers, 491 struct mapped_buffer_node ***mapped_buffers,
@@ -482,6 +507,16 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
482void gk20a_vm_get(struct vm_gk20a *vm); 507void gk20a_vm_get(struct vm_gk20a *vm);
483void gk20a_vm_put(struct vm_gk20a *vm); 508void gk20a_vm_put(struct vm_gk20a *vm);
484 509
510void gk20a_vm_remove_support(struct vm_gk20a *vm);
511
512u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
513 u64 size,
514 enum gmmu_pgsz_gk20a gmmu_pgsz_idx);
515
516int gk20a_vm_free_va(struct vm_gk20a *vm,
517 u64 offset, u64 size,
518 enum gmmu_pgsz_gk20a pgsz_idx);
519
485/* vm-as interface */ 520/* vm-as interface */
486struct nvhost_as_alloc_space_args; 521struct nvhost_as_alloc_space_args;
487struct nvhost_as_free_space_args; 522struct nvhost_as_free_space_args;
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
index 6dd0c0db..e6ed9898 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -151,6 +151,12 @@ struct gk20a_platform {
151 * of the CPU. 151 * of the CPU.
152 */ 152 */
153 void (*dump_platform_dependencies)(struct platform_device *dev); 153 void (*dump_platform_dependencies)(struct platform_device *dev);
154
155#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
156 bool virtual_dev;
157 u64 virt_handle;
158 struct task_struct *intr_handler;
159#endif
154}; 160};
155 161
156static inline struct gk20a_platform *gk20a_get_platform( 162static inline struct gk20a_platform *gk20a_get_platform(
@@ -163,6 +169,9 @@ extern struct gk20a_platform gk20a_generic_platform;
163#ifdef CONFIG_TEGRA_GK20A 169#ifdef CONFIG_TEGRA_GK20A
164extern struct gk20a_platform gk20a_tegra_platform; 170extern struct gk20a_platform gk20a_tegra_platform;
165extern struct gk20a_platform gm20b_tegra_platform; 171extern struct gk20a_platform gm20b_tegra_platform;
172#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
173extern struct gk20a_platform vgpu_tegra_platform;
174#endif
166#endif 175#endif
167 176
168static inline bool gk20a_platform_has_syncpoints(struct platform_device *dev) 177static inline bool gk20a_platform_has_syncpoints(struct platform_device *dev)
diff --git a/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c
new file mode 100644
index 00000000..ea4fde79
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c
@@ -0,0 +1,64 @@
1/*
2 * Tegra Virtualized GPU Platform Interface
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/of_platform.h>
17
18#include "gk20a.h"
19#include "hal_gk20a.h"
20#include "platform_gk20a.h"
21
22static int gk20a_tegra_probe(struct platform_device *dev)
23{
24 struct gk20a_platform *platform = gk20a_get_platform(dev);
25 struct device_node *np = dev->dev.of_node;
26 const __be32 *host1x_ptr;
27 struct platform_device *host1x_pdev = NULL;
28
29 host1x_ptr = of_get_property(np, "nvidia,host1x", NULL);
30 if (host1x_ptr) {
31 struct device_node *host1x_node =
32 of_find_node_by_phandle(be32_to_cpup(host1x_ptr));
33
34 host1x_pdev = of_find_device_by_node(host1x_node);
35 if (!host1x_pdev) {
36 dev_warn(&dev->dev, "host1x device not available");
37 return -EPROBE_DEFER;
38 }
39
40 } else {
41 host1x_pdev = to_platform_device(dev->dev.parent);
42 dev_warn(&dev->dev, "host1x reference not found. assuming host1x to be parent");
43 }
44
45 platform->g->host1x_dev = host1x_pdev;
46
47 return 0;
48}
49
50struct gk20a_platform vgpu_tegra_platform = {
51 .has_syncpoints = true,
52
53 /* power management configuration */
54 .can_railgate = false,
55 .enable_slcg = false,
56 .enable_blcg = false,
57 .enable_elcg = false,
58 .enable_elpg = false,
59 .enable_aelpg = false,
60
61 .probe = gk20a_tegra_probe,
62
63 .virtual_dev = true,
64};
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index 7e580136..86d049cf 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -102,5 +102,14 @@ static void gm20b_fifo_trigger_mmu_fault(struct gk20a *g,
102void gm20b_init_fifo(struct gpu_ops *gops) 102void gm20b_init_fifo(struct gpu_ops *gops)
103{ 103{
104 gops->fifo.bind_channel = channel_gm20b_bind; 104 gops->fifo.bind_channel = channel_gm20b_bind;
105 gops->fifo.unbind_channel = channel_gk20a_unbind;
106 gops->fifo.disable_channel = channel_gk20a_disable;
107 gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
108 gops->fifo.free_inst = channel_gk20a_free_inst;
109 gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
110
111 gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
112 gops->fifo.update_runlist = gk20a_fifo_update_runlist;
105 gops->fifo.trigger_mmu_fault = gm20b_fifo_trigger_mmu_fault; 113 gops->fifo.trigger_mmu_fault = gm20b_fifo_trigger_mmu_fault;
114 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
106} 115}
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 72500b0e..c9c32b9f 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -751,4 +751,9 @@ void gm20b_init_gr(struct gpu_ops *gops)
751 gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; 751 gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
752#endif 752#endif
753 gops->gr.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask; 753 gops->gr.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask;
754 gops->gr.free_channel_ctx = gk20a_free_channel_ctx;
755 gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx;
756 gops->gr.free_obj_ctx = gk20a_free_obj_ctx;
757 gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull;
758 gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
754} 759}
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index ac82d56a..ed5b5e0d 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -327,4 +327,13 @@ void gm20b_init_mm(struct gpu_ops *gops)
327 gops->mm.set_sparse = gm20b_vm_put_sparse; 327 gops->mm.set_sparse = gm20b_vm_put_sparse;
328 gops->mm.clear_sparse = gm20b_vm_clear_sparse; 328 gops->mm.clear_sparse = gm20b_vm_clear_sparse;
329 gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled; 329 gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled;
330 gops->mm.gmmu_map = gk20a_locked_gmmu_map;
331 gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
332 gops->mm.vm_remove = gk20a_vm_remove_support;
333 gops->mm.vm_alloc_share = gk20a_vm_alloc_share;
334 gops->mm.vm_bind_channel = gk20a_vm_bind_channel;
335 gops->mm.fb_flush = gk20a_mm_fb_flush;
336 gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
337 gops->mm.l2_flush = gk20a_mm_l2_flush;
338 gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
330} 339}
diff --git a/drivers/gpu/nvgpu/vgpu/Makefile b/drivers/gpu/nvgpu/vgpu/Makefile
new file mode 100644
index 00000000..edad7171
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/Makefile
@@ -0,0 +1,10 @@
1GCOV_PROFILE := y
2ccflags-y += -Idrivers/gpu/nvgpu
3ccflags-y += -Wno-multichar
4
5obj-$(CONFIG_TEGRA_GR_VIRTUALIZATION) = \
6 ltc_vgpu.o \
7 gr_vgpu.o \
8 fifo_vgpu.o \
9 mm_vgpu.o \
10 vgpu.o
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
new file mode 100644
index 00000000..23dec1f3
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -0,0 +1,569 @@
1/*
2 * Virtualized GPU Fifo
3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/dma-mapping.h>
17#include "vgpu/vgpu.h"
18#include "gk20a/hw_fifo_gk20a.h"
19#include "gk20a/hw_ram_gk20a.h"
20
21static void vgpu_channel_bind(struct channel_gk20a *ch)
22{
23 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
24 struct tegra_vgpu_cmd_msg msg;
25 struct tegra_vgpu_channel_config_params *p =
26 &msg.params.channel_config;
27 int err;
28
29 gk20a_dbg_info("bind channel %d", ch->hw_chid);
30
31 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND;
32 msg.handle = platform->virt_handle;
33 p->handle = ch->virt_ctx;
34 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
35 WARN_ON(err || msg.ret);
36
37 ch->bound = true;
38}
39
40static void vgpu_channel_unbind(struct channel_gk20a *ch)
41{
42 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
43
44 gk20a_dbg_fn("");
45
46 if (ch->bound) {
47 struct tegra_vgpu_cmd_msg msg;
48 struct tegra_vgpu_channel_config_params *p =
49 &msg.params.channel_config;
50 int err;
51
52 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNBIND;
53 msg.handle = platform->virt_handle;
54 p->handle = ch->virt_ctx;
55 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
56 WARN_ON(err || msg.ret);
57 }
58
59 ch->bound = false;
60
61 /*
62 * if we are agrressive then we can destroy the syncpt
63 * resource at this point
64 * if not, then it will be destroyed at channel_free()
65 */
66 if (ch->sync && ch->sync->aggressive_destroy) {
67 ch->sync->destroy(ch->sync);
68 ch->sync = NULL;
69 }
70}
71
72static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
73{
74 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
75 struct tegra_vgpu_cmd_msg msg;
76 struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
77 int err;
78
79 gk20a_dbg_fn("");
80
81 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX;
82 msg.handle = platform->virt_handle;
83 p->id = ch->hw_chid;
84 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
85 if (err || msg.ret) {
86 gk20a_err(dev_from_gk20a(g), "fail");
87 return -ENOMEM;
88 }
89
90 ch->virt_ctx = p->handle;
91 gk20a_dbg_fn("done");
92 return 0;
93}
94
95static void vgpu_channel_free_inst(struct gk20a *g, struct channel_gk20a *ch)
96{
97 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
98 struct tegra_vgpu_cmd_msg msg;
99 struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
100 int err;
101
102 gk20a_dbg_fn("");
103
104 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWCTX;
105 msg.handle = platform->virt_handle;
106 p->handle = ch->virt_ctx;
107 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
108 WARN_ON(err || msg.ret);
109}
110
111static void vgpu_channel_disable(struct channel_gk20a *ch)
112{
113 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
114 struct tegra_vgpu_cmd_msg msg;
115 struct tegra_vgpu_channel_config_params *p =
116 &msg.params.channel_config;
117 int err;
118
119 gk20a_dbg_fn("");
120
121 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_DISABLE;
122 msg.handle = platform->virt_handle;
123 p->handle = ch->virt_ctx;
124 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
125 WARN_ON(err || msg.ret);
126}
127
128static int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base,
129 u32 gpfifo_entries)
130{
131 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
132 struct device __maybe_unused *d = dev_from_gk20a(ch->g);
133 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
134 struct tegra_vgpu_cmd_msg msg;
135 struct tegra_vgpu_ramfc_params *p = &msg.params.ramfc;
136 int err;
137
138 gk20a_dbg_fn("");
139
140 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SETUP_RAMFC;
141 msg.handle = platform->virt_handle;
142 p->handle = ch->virt_ctx;
143 p->gpfifo_va = gpfifo_base;
144 p->num_entries = gpfifo_entries;
145 p->userd_addr = ch->userd_iova;
146 p->iova = mapping ? 1 : 0;
147 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
148
149 return (err || msg.ret) ? -ENOMEM : 0;
150}
151
152static int init_engine_info(struct fifo_gk20a *f)
153{
154 struct fifo_engine_info_gk20a *gr_info;
155 const u32 gr_sw_id = ENGINE_GR_GK20A;
156
157 gk20a_dbg_fn("");
158
159 /* all we really care about finding is the graphics entry */
160 /* especially early on in sim it probably thinks it has more */
161 f->num_engines = 1;
162
163 gr_info = f->engine_info + gr_sw_id;
164
165 gr_info->sw_id = gr_sw_id;
166 gr_info->name = "gr";
167 /* FIXME: retrieve this from server */
168 gr_info->runlist_id = 0;
169 return 0;
170}
171
172static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
173{
174 struct fifo_engine_info_gk20a *engine_info;
175 struct fifo_runlist_info_gk20a *runlist;
176 struct device *d = dev_from_gk20a(g);
177 u32 runlist_id;
178 u32 i;
179 u64 runlist_size;
180
181 gk20a_dbg_fn("");
182
183 f->max_runlists = fifo_eng_runlist_base__size_1_v();
184 f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) *
185 f->max_runlists, GFP_KERNEL);
186 if (!f->runlist_info)
187 goto clean_up;
188
189 engine_info = f->engine_info + ENGINE_GR_GK20A;
190 runlist_id = engine_info->runlist_id;
191 runlist = &f->runlist_info[runlist_id];
192
193 runlist->active_channels =
194 kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
195 GFP_KERNEL);
196 if (!runlist->active_channels)
197 goto clean_up_runlist_info;
198
199 runlist_size = sizeof(u16) * f->num_channels;
200 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
201 dma_addr_t iova;
202
203 runlist->mem[i].cpuva =
204 dma_alloc_coherent(d,
205 runlist_size,
206 &iova,
207 GFP_KERNEL);
208 if (!runlist->mem[i].cpuva) {
209 dev_err(d, "memory allocation failed\n");
210 goto clean_up_runlist;
211 }
212 runlist->mem[i].iova = iova;
213 runlist->mem[i].size = runlist_size;
214 }
215 mutex_init(&runlist->mutex);
216 init_waitqueue_head(&runlist->runlist_wq);
217
218 /* None of buffers is pinned if this value doesn't change.
219 Otherwise, one of them (cur_buffer) must have been pinned. */
220 runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
221
222 gk20a_dbg_fn("done");
223 return 0;
224
225clean_up_runlist:
226 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
227 if (runlist->mem[i].cpuva)
228 dma_free_coherent(d,
229 runlist->mem[i].size,
230 runlist->mem[i].cpuva,
231 runlist->mem[i].iova);
232 runlist->mem[i].cpuva = NULL;
233 runlist->mem[i].iova = 0;
234 }
235
236 kfree(runlist->active_channels);
237 runlist->active_channels = NULL;
238
239clean_up_runlist_info:
240 kfree(f->runlist_info);
241 f->runlist_info = NULL;
242
243clean_up:
244 gk20a_dbg_fn("fail");
245 return -ENOMEM;
246}
247
248static int vgpu_init_fifo_setup_sw(struct gk20a *g)
249{
250 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
251 struct fifo_gk20a *f = &g->fifo;
252 struct device *d = dev_from_gk20a(g);
253 int chid, err = 0;
254 dma_addr_t iova;
255
256 gk20a_dbg_fn("");
257
258 if (f->sw_ready) {
259 gk20a_dbg_fn("skip init");
260 return 0;
261 }
262
263 f->g = g;
264
265 err = vgpu_get_attribute(platform->virt_handle,
266 TEGRA_VGPU_ATTRIB_NUM_CHANNELS,
267 &f->num_channels);
268 if (err)
269 return -ENXIO;
270
271 f->max_engines = ENGINE_INVAL_GK20A;
272
273 f->userd_entry_size = 1 << ram_userd_base_shift_v();
274 f->userd_total_size = f->userd_entry_size * f->num_channels;
275
276 f->userd.cpuva = dma_alloc_coherent(d,
277 f->userd_total_size,
278 &iova,
279 GFP_KERNEL);
280 if (!f->userd.cpuva) {
281 dev_err(d, "memory allocation failed\n");
282 goto clean_up;
283 }
284
285 f->userd.iova = iova;
286 err = gk20a_get_sgtable(d, &f->userd.sgt,
287 f->userd.cpuva, f->userd.iova,
288 f->userd_total_size);
289 if (err) {
290 dev_err(d, "failed to create sg table\n");
291 goto clean_up;
292 }
293
294 /* bar1 va */
295 f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size);
296 if (!f->userd.gpu_va) {
297 dev_err(d, "gmmu mapping failed\n");
298 goto clean_up;
299 }
300
301 gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
302
303 f->userd.size = f->userd_total_size;
304
305 f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
306 GFP_KERNEL);
307 f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
308 GFP_KERNEL);
309
310 if (!(f->channel && f->engine_info)) {
311 err = -ENOMEM;
312 goto clean_up;
313 }
314
315 init_engine_info(f);
316
317 init_runlist(g, f);
318
319 for (chid = 0; chid < f->num_channels; chid++) {
320 f->channel[chid].userd_cpu_va =
321 f->userd.cpuva + chid * f->userd_entry_size;
322 f->channel[chid].userd_iova =
323 NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova)
324 + chid * f->userd_entry_size;
325 f->channel[chid].userd_gpu_va =
326 f->userd.gpu_va + chid * f->userd_entry_size;
327
328 gk20a_init_channel_support(g, chid);
329 }
330 mutex_init(&f->ch_inuse_mutex);
331
332 f->deferred_reset_pending = false;
333 mutex_init(&f->deferred_reset_mutex);
334
335 f->sw_ready = true;
336
337 gk20a_dbg_fn("done");
338 return 0;
339
340clean_up:
341 gk20a_dbg_fn("fail");
342 /* FIXME: unmap from bar1 */
343 if (f->userd.sgt)
344 gk20a_free_sgtable(&f->userd.sgt);
345 if (f->userd.cpuva)
346 dma_free_coherent(d,
347 f->userd_total_size,
348 f->userd.cpuva,
349 f->userd.iova);
350 f->userd.cpuva = NULL;
351 f->userd.iova = 0;
352
353 memset(&f->userd, 0, sizeof(struct userd_desc));
354
355 kfree(f->channel);
356 f->channel = NULL;
357 kfree(f->engine_info);
358 f->engine_info = NULL;
359
360 return err;
361}
362
363static int vgpu_init_fifo_setup_hw(struct gk20a *g)
364{
365 gk20a_dbg_fn("");
366
367 /* test write, read through bar1 @ userd region before
368 * turning on the snooping */
369 {
370 struct fifo_gk20a *f = &g->fifo;
371 u32 v, v1 = 0x33, v2 = 0x55;
372
373 u32 bar1_vaddr = f->userd.gpu_va;
374 volatile u32 *cpu_vaddr = f->userd.cpuva;
375
376 gk20a_dbg_info("test bar1 @ vaddr 0x%x",
377 bar1_vaddr);
378
379 v = gk20a_bar1_readl(g, bar1_vaddr);
380
381 *cpu_vaddr = v1;
382 smp_mb();
383
384 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
385 gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
386 return -EINVAL;
387 }
388
389 gk20a_bar1_writel(g, bar1_vaddr, v2);
390
391 if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
392 gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
393 return -EINVAL;
394 }
395
396 /* is it visible to the cpu? */
397 if (*cpu_vaddr != v2) {
398 gk20a_err(dev_from_gk20a(g),
399 "cpu didn't see bar1 write @ %p!",
400 cpu_vaddr);
401 }
402
403 /* put it back */
404 gk20a_bar1_writel(g, bar1_vaddr, v);
405 }
406
407 gk20a_dbg_fn("done");
408
409 return 0;
410}
411
412int vgpu_init_fifo_support(struct gk20a *g)
413{
414 u32 err;
415
416 gk20a_dbg_fn("");
417
418 err = vgpu_init_fifo_setup_sw(g);
419 if (err)
420 return err;
421
422 err = vgpu_init_fifo_setup_hw(g);
423 return err;
424}
425
426static int vgpu_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
427{
428 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
429 struct fifo_gk20a *f = &g->fifo;
430 struct tegra_vgpu_cmd_msg msg;
431 struct tegra_vgpu_channel_config_params *p =
432 &msg.params.channel_config;
433 int err;
434
435 gk20a_dbg_fn("");
436
437 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT;
438 msg.handle = platform->virt_handle;
439 p->handle = f->channel[hw_chid].virt_ctx;
440 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
441
442 if (err || msg.ret) {
443 gk20a_err(dev_from_gk20a(g),
444 "preempt channel %d failed\n", hw_chid);
445 err = -ENOMEM;
446 }
447
448 return err;
449}
450
451static int vgpu_submit_runlist(u64 handle, u8 runlist_id, u16 *runlist,
452 u32 num_entries)
453{
454 struct tegra_vgpu_cmd_msg *msg;
455 struct tegra_vgpu_runlist_params *p;
456 size_t size = sizeof(*msg) + sizeof(*runlist) * num_entries;
457 char *ptr;
458 int err;
459
460 msg = kmalloc(size, GFP_KERNEL);
461 if (!msg)
462 return -1;
463
464 msg->cmd = TEGRA_VGPU_CMD_SUBMIT_RUNLIST;
465 msg->handle = handle;
466 p = &msg->params.runlist;
467 p->runlist_id = runlist_id;
468 p->num_entries = num_entries;
469
470 ptr = (char *)msg + sizeof(*msg);
471 memcpy(ptr, runlist, sizeof(*runlist) * num_entries);
472 err = vgpu_comm_sendrecv(msg, size, sizeof(*msg));
473
474 err = (err || msg->ret) ? -1 : 0;
475 kfree(msg);
476 return err;
477}
478
479static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
480 u32 hw_chid, bool add,
481 bool wait_for_finish)
482{
483 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
484 struct fifo_gk20a *f = &g->fifo;
485 struct fifo_runlist_info_gk20a *runlist;
486 u16 *runlist_entry = NULL;
487 u32 count = 0;
488
489 gk20a_dbg_fn("");
490
491 runlist = &f->runlist_info[runlist_id];
492
493 /* valid channel, add/remove it from active list.
494 Otherwise, keep active list untouched for suspend/resume. */
495 if (hw_chid != ~0) {
496 if (add) {
497 if (test_and_set_bit(hw_chid,
498 runlist->active_channels) == 1)
499 return 0;
500 } else {
501 if (test_and_clear_bit(hw_chid,
502 runlist->active_channels) == 0)
503 return 0;
504 }
505 }
506
507 if (hw_chid != ~0 || /* add/remove a valid channel */
508 add /* resume to add all channels back */) {
509 u32 chid;
510
511 runlist_entry = runlist->mem[0].cpuva;
512 for_each_set_bit(chid,
513 runlist->active_channels, f->num_channels) {
514 gk20a_dbg_info("add channel %d to runlist", chid);
515 runlist_entry[0] = chid;
516 runlist_entry++;
517 count++;
518 }
519 } else /* suspend to remove all channels */
520 count = 0;
521
522 return vgpu_submit_runlist(platform->virt_handle, runlist_id,
523 runlist->mem[0].cpuva, count);
524}
525
526/* add/remove a channel from runlist
527 special cases below: runlist->active_channels will NOT be changed.
528 (hw_chid == ~0 && !add) means remove all active channels from runlist.
529 (hw_chid == ~0 && add) means restore all active channels on runlist. */
530static int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id,
531 u32 hw_chid, bool add, bool wait_for_finish)
532{
533 struct fifo_runlist_info_gk20a *runlist = NULL;
534 struct fifo_gk20a *f = &g->fifo;
535 u32 ret = 0;
536
537 gk20a_dbg_fn("");
538
539 runlist = &f->runlist_info[runlist_id];
540
541 mutex_lock(&runlist->mutex);
542
543 ret = vgpu_fifo_update_runlist_locked(g, runlist_id, hw_chid, add,
544 wait_for_finish);
545
546 mutex_unlock(&runlist->mutex);
547 return ret;
548}
549
550static int vgpu_fifo_wait_engine_idle(struct gk20a *g)
551{
552 gk20a_dbg_fn("");
553
554 return 0;
555}
556
557void vgpu_init_fifo_ops(struct gpu_ops *gops)
558{
559 gops->fifo.bind_channel = vgpu_channel_bind;
560 gops->fifo.unbind_channel = vgpu_channel_unbind;
561 gops->fifo.disable_channel = vgpu_channel_disable;
562 gops->fifo.alloc_inst = vgpu_channel_alloc_inst;
563 gops->fifo.free_inst = vgpu_channel_free_inst;
564 gops->fifo.setup_ramfc = vgpu_channel_setup_ramfc;
565 gops->fifo.preempt_channel = vgpu_fifo_preempt_channel;
566 gops->fifo.update_runlist = vgpu_fifo_update_runlist;
567 gops->fifo.wait_engine_idle = vgpu_fifo_wait_engine_idle;
568}
569
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
new file mode 100644
index 00000000..a7e966da
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -0,0 +1,687 @@
1/*
2 * Virtualized GPU Graphics
3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include "vgpu/vgpu.h"
17#include "gk20a/hw_gr_gk20a.h"
18
19static int vgpu_gr_commit_inst(struct channel_gk20a *c, u64 gpu_va)
20{
21 struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
22 struct tegra_vgpu_cmd_msg msg;
23 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
24 int err;
25
26 gk20a_dbg_fn("");
27
28 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_CTX;
29 msg.handle = platform->virt_handle;
30 p->handle = c->virt_ctx;
31 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
32
33 return (err || msg.ret) ? -1 : 0;
34}
35
36static int vgpu_gr_commit_global_ctx_buffers(struct gk20a *g,
37 struct channel_gk20a *c, bool patch)
38{
39 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
40 struct tegra_vgpu_cmd_msg msg;
41 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
42 int err;
43
44 gk20a_dbg_fn("");
45
46 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_GLOBAL_CTX;
47 msg.handle = platform->virt_handle;
48 p->handle = c->virt_ctx;
49 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
50
51 return (err || msg.ret) ? -1 : 0;
52}
53
54/* load saved fresh copy of gloden image into channel gr_ctx */
55static int vgpu_gr_load_golden_ctx_image(struct gk20a *g,
56 struct channel_gk20a *c)
57{
58 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
59 struct tegra_vgpu_cmd_msg msg;
60 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
61 int err;
62
63 gk20a_dbg_fn("");
64
65 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_LOAD_GR_GOLDEN_CTX;
66 msg.handle = platform->virt_handle;
67 p->handle = c->virt_ctx;
68 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
69
70 return (err || msg.ret) ? -1 : 0;
71}
72
73static int vgpu_gr_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr)
74{
75 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
76
77 gk20a_dbg_fn("");
78
79 vgpu_get_attribute(platform->virt_handle,
80 TEGRA_VGPU_ATTRIB_GOLDEN_CTX_SIZE,
81 &g->gr.ctx_vars.golden_image_size);
82 vgpu_get_attribute(platform->virt_handle,
83 TEGRA_VGPU_ATTRIB_ZCULL_CTX_SIZE,
84 &g->gr.ctx_vars.zcull_ctxsw_image_size);
85 if (!g->gr.ctx_vars.golden_image_size ||
86 !g->gr.ctx_vars.zcull_ctxsw_image_size)
87 return -ENXIO;
88
89 gr->ctx_vars.buffer_size = g->gr.ctx_vars.golden_image_size;
90 g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
91 return 0;
92}
93
94static int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g)
95{
96 struct gr_gk20a *gr = &g->gr;
97 int attr_buffer_size;
98
99 u32 cb_buffer_size = gr->bundle_cb_default_size *
100 gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
101
102 u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() *
103 gr_scc_pagepool_total_pages_byte_granularity_v();
104
105 gk20a_dbg_fn("");
106
107 attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g);
108
109 gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size);
110 gr->global_ctx_buffer[CIRCULAR].size = cb_buffer_size;
111
112 gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size);
113 gr->global_ctx_buffer[PAGEPOOL].size = pagepool_buffer_size;
114
115 gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size);
116 gr->global_ctx_buffer[ATTRIBUTE].size = attr_buffer_size;
117
118 gk20a_dbg_info("priv access map size : %d",
119 gr->ctx_vars.priv_access_map_size);
120 gr->global_ctx_buffer[PRIV_ACCESS_MAP].size =
121 gr->ctx_vars.priv_access_map_size;
122
123 return 0;
124}
125
126static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
127 struct channel_gk20a *c)
128{
129 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
130 struct tegra_vgpu_cmd_msg msg;
131 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
132 struct vm_gk20a *ch_vm = c->vm;
133 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
134 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
135 struct gr_gk20a *gr = &g->gr;
136 u64 gpu_va;
137 u32 i;
138 int err;
139
140 gk20a_dbg_fn("");
141
142 /* FIXME: add VPR support */
143
144 /* Circular Buffer */
145 gpu_va = gk20a_vm_alloc_va(ch_vm,
146 gr->global_ctx_buffer[CIRCULAR].size, 0);
147
148 if (!gpu_va)
149 goto clean_up;
150 g_bfr_va[CIRCULAR_VA] = gpu_va;
151 g_bfr_size[CIRCULAR_VA] = gr->global_ctx_buffer[CIRCULAR].size;
152
153 /* Attribute Buffer */
154 gpu_va = gk20a_vm_alloc_va(ch_vm,
155 gr->global_ctx_buffer[ATTRIBUTE].size, 0);
156
157 if (!gpu_va)
158 goto clean_up;
159 g_bfr_va[ATTRIBUTE_VA] = gpu_va;
160 g_bfr_size[ATTRIBUTE_VA] = gr->global_ctx_buffer[ATTRIBUTE].size;
161
162 /* Page Pool */
163 gpu_va = gk20a_vm_alloc_va(ch_vm,
164 gr->global_ctx_buffer[PAGEPOOL].size, 0);
165 if (!gpu_va)
166 goto clean_up;
167 g_bfr_va[PAGEPOOL_VA] = gpu_va;
168 g_bfr_size[PAGEPOOL_VA] = gr->global_ctx_buffer[PAGEPOOL].size;
169
170 /* Priv register Access Map */
171 gpu_va = gk20a_vm_alloc_va(ch_vm,
172 gr->global_ctx_buffer[PRIV_ACCESS_MAP].size, 0);
173 if (!gpu_va)
174 goto clean_up;
175 g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
176 g_bfr_size[PRIV_ACCESS_MAP_VA] =
177 gr->global_ctx_buffer[PRIV_ACCESS_MAP].size;
178
179 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_MAP_GR_GLOBAL_CTX;
180 msg.handle = platform->virt_handle;
181 p->handle = c->virt_ctx;
182 p->cb_va = g_bfr_va[CIRCULAR_VA];
183 p->attr_va = g_bfr_va[ATTRIBUTE_VA];
184 p->page_pool_va = g_bfr_va[PAGEPOOL_VA];
185 p->priv_access_map_va = g_bfr_va[PRIV_ACCESS_MAP_VA];
186 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
187 if (err || msg.ret)
188 goto clean_up;
189
190 c->ch_ctx.global_ctx_buffer_mapped = true;
191 return 0;
192
193 clean_up:
194 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
195 if (g_bfr_va[i]) {
196 gk20a_vm_free_va(ch_vm, g_bfr_va[i],
197 g_bfr_size[i], 0);
198 g_bfr_va[i] = 0;
199 }
200 }
201 return -ENOMEM;
202}
203
204static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c)
205{
206 struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
207 struct vm_gk20a *ch_vm = c->vm;
208 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
209 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
210 u32 i;
211
212 gk20a_dbg_fn("");
213
214 if (c->ch_ctx.global_ctx_buffer_mapped) {
215 struct tegra_vgpu_cmd_msg msg;
216 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
217 int err;
218
219 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX;
220 msg.handle = platform->virt_handle;
221 p->handle = c->virt_ctx;
222 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
223 WARN_ON(err || msg.ret);
224 }
225
226 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
227 if (g_bfr_va[i]) {
228 gk20a_vm_free_va(ch_vm, g_bfr_va[i], g_bfr_size[i], 0);
229 g_bfr_va[i] = 0;
230 g_bfr_size[i] = 0;
231 }
232 }
233 c->ch_ctx.global_ctx_buffer_mapped = false;
234}
235
236static int vgpu_gr_alloc_channel_gr_ctx(struct gk20a *g,
237 struct channel_gk20a *c)
238{
239 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
240 struct tegra_vgpu_cmd_msg msg;
241 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
242 struct gr_gk20a *gr = &g->gr;
243 struct gr_ctx_desc *gr_ctx;
244 struct vm_gk20a *ch_vm = c->vm;
245 int err;
246
247 gk20a_dbg_fn("");
248
249 if (gr->ctx_vars.buffer_size == 0)
250 return 0;
251
252 /* alloc channel gr ctx buffer */
253 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
254 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
255
256 gr_ctx = kzalloc(sizeof(*gr_ctx), GFP_KERNEL);
257 if (!gr_ctx)
258 return -ENOMEM;
259
260 gr_ctx->size = gr->ctx_vars.buffer_total_size;
261 gr_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, gr_ctx->size, 0);
262
263 if (!gr_ctx->gpu_va) {
264 kfree(gr_ctx);
265 return -ENOMEM;
266 }
267
268 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_CTX;
269 msg.handle = platform->virt_handle;
270 p->handle = c->virt_ctx;
271 p->gr_ctx_va = gr_ctx->gpu_va;
272 p->class_num = c->obj_class;
273 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
274
275 if (err || msg.ret) {
276 gk20a_vm_free_va(ch_vm, gr_ctx->gpu_va, gr_ctx->size, 0);
277 err = -ENOMEM;
278 } else
279 c->ch_ctx.gr_ctx = gr_ctx;
280
281 return err;
282}
283
284static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c)
285{
286 struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
287 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
288 struct vm_gk20a *ch_vm = c->vm;
289
290 gk20a_dbg_fn("");
291
292 if (ch_ctx->gr_ctx && ch_ctx->gr_ctx->gpu_va) {
293 struct tegra_vgpu_cmd_msg msg;
294 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
295 int err;
296
297 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_CTX;
298 msg.handle = platform->virt_handle;
299 p->handle = c->virt_ctx;
300 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
301 WARN_ON(err || msg.ret);
302
303 gk20a_vm_free_va(ch_vm, ch_ctx->gr_ctx->gpu_va,
304 ch_ctx->gr_ctx->size, 0);
305 ch_ctx->gr_ctx->gpu_va = 0;
306 kfree(ch_ctx->gr_ctx);
307 }
308}
309
310static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
311 struct channel_gk20a *c)
312{
313 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
314 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
315 struct vm_gk20a *ch_vm = c->vm;
316 struct tegra_vgpu_cmd_msg msg;
317 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
318 int err;
319
320 gk20a_dbg_fn("");
321
322 patch_ctx->size = 128 * sizeof(u32);
323 patch_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, patch_ctx->size, 0);
324 if (!patch_ctx->gpu_va)
325 return -ENOMEM;
326
327 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_PATCH_CTX;
328 msg.handle = platform->virt_handle;
329 p->handle = c->virt_ctx;
330 p->patch_ctx_va = patch_ctx->gpu_va;
331 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
332 if (err || msg.ret) {
333 gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0);
334 err = -ENOMEM;
335 }
336
337 return err;
338}
339
340static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c)
341{
342 struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
343 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
344 struct vm_gk20a *ch_vm = c->vm;
345
346 gk20a_dbg_fn("");
347
348 if (patch_ctx->gpu_va) {
349 struct tegra_vgpu_cmd_msg msg;
350 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
351 int err;
352
353 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX;
354 msg.handle = platform->virt_handle;
355 p->handle = c->virt_ctx;
356 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
357 WARN_ON(err || msg.ret);
358
359 gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0);
360 patch_ctx->gpu_va = 0;
361 }
362}
363
364static void vgpu_gr_free_channel_ctx(struct channel_gk20a *c)
365{
366 gk20a_dbg_fn("");
367
368 vgpu_gr_unmap_global_ctx_buffers(c);
369 vgpu_gr_free_channel_patch_ctx(c);
370 if (!gk20a_is_channel_marked_as_tsg(c))
371 vgpu_gr_free_channel_gr_ctx(c);
372
373 /* zcull_ctx, pm_ctx */
374
375 memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
376
377 c->num_objects = 0;
378 c->first_init = false;
379}
380
381static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c,
382 struct nvhost_alloc_obj_ctx_args *args)
383{
384 struct gk20a *g = c->g;
385 struct fifo_gk20a *f = &g->fifo;
386 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
387 struct tsg_gk20a *tsg = NULL;
388 int err = 0;
389
390 gk20a_dbg_fn("");
391
392 /* an address space needs to have been bound at this point.*/
393 if (!gk20a_channel_as_bound(c)) {
394 gk20a_err(dev_from_gk20a(g),
395 "not bound to address space at time"
396 " of grctx allocation");
397 return -EINVAL;
398 }
399
400 if (!g->ops.gr.is_valid_class(g, args->class_num)) {
401 gk20a_err(dev_from_gk20a(g),
402 "invalid obj class 0x%x", args->class_num);
403 err = -EINVAL;
404 goto out;
405 }
406 c->obj_class = args->class_num;
407
408 /* FIXME: add TSG support */
409 if (gk20a_is_channel_marked_as_tsg(c))
410 tsg = &f->tsg[c->tsgid];
411
412 /* allocate gr ctx buffer */
413 if (!ch_ctx->gr_ctx) {
414 err = vgpu_gr_alloc_channel_gr_ctx(g, c);
415 if (err) {
416 gk20a_err(dev_from_gk20a(g),
417 "fail to allocate gr ctx buffer");
418 goto out;
419 }
420 } else {
421 /*TBD: needs to be more subtle about which is
422 * being allocated as some are allowed to be
423 * allocated along same channel */
424 gk20a_err(dev_from_gk20a(g),
425 "too many classes alloc'd on same channel");
426 err = -EINVAL;
427 goto out;
428 }
429
430 /* commit gr ctx buffer */
431 err = vgpu_gr_commit_inst(c, ch_ctx->gr_ctx->gpu_va);
432 if (err) {
433 gk20a_err(dev_from_gk20a(g),
434 "fail to commit gr ctx buffer");
435 goto out;
436 }
437
438 /* allocate patch buffer */
439 if (ch_ctx->patch_ctx.pages == NULL) {
440 err = vgpu_gr_alloc_channel_patch_ctx(g, c);
441 if (err) {
442 gk20a_err(dev_from_gk20a(g),
443 "fail to allocate patch buffer");
444 goto out;
445 }
446 }
447
448 /* map global buffer to channel gpu_va and commit */
449 if (!ch_ctx->global_ctx_buffer_mapped) {
450 err = vgpu_gr_map_global_ctx_buffers(g, c);
451 if (err) {
452 gk20a_err(dev_from_gk20a(g),
453 "fail to map global ctx buffer");
454 goto out;
455 }
456 gr_gk20a_elpg_protected_call(g,
457 vgpu_gr_commit_global_ctx_buffers(g, c, true));
458 }
459
460 /* load golden image */
461 if (!c->first_init) {
462 err = gr_gk20a_elpg_protected_call(g,
463 vgpu_gr_load_golden_ctx_image(g, c));
464 if (err) {
465 gk20a_err(dev_from_gk20a(g),
466 "fail to load golden ctx image");
467 goto out;
468 }
469 c->first_init = true;
470 }
471
472 c->num_objects++;
473
474 gk20a_dbg_fn("done");
475 return 0;
476out:
477 /* 1. gr_ctx, patch_ctx and global ctx buffer mapping
478 can be reused so no need to release them.
479 2. golden image load is a one time thing so if
480 they pass, no need to undo. */
481 gk20a_err(dev_from_gk20a(g), "fail");
482 return err;
483}
484
485static int vgpu_gr_free_obj_ctx(struct channel_gk20a *c,
486 struct nvhost_free_obj_ctx_args *args)
487{
488 unsigned long timeout = gk20a_get_gr_idle_timeout(c->g);
489
490 gk20a_dbg_fn("");
491
492 if (c->num_objects == 0)
493 return 0;
494
495 c->num_objects--;
496
497 if (c->num_objects == 0) {
498 c->first_init = false;
499 gk20a_disable_channel(c,
500 !c->has_timedout,
501 timeout);
502 }
503
504 return 0;
505}
506
507static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
508{
509 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
510
511 gk20a_dbg_fn("");
512
513 if (vgpu_get_attribute(platform->virt_handle,
514 TEGRA_VGPU_ATTRIB_GPC_COUNT, &gr->gpc_count))
515 return -ENOMEM;
516
517 if (vgpu_get_attribute(platform->virt_handle,
518 TEGRA_VGPU_ATTRIB_MAX_TPC_PER_GPC_COUNT,
519 &gr->max_tpc_per_gpc_count))
520 return -ENOMEM;
521
522 if (vgpu_get_attribute(platform->virt_handle,
523 TEGRA_VGPU_ATTRIB_MAX_TPC_COUNT,
524 &gr->max_tpc_count))
525 return -ENOMEM;
526
527 g->ops.gr.bundle_cb_defaults(g);
528 g->ops.gr.cb_size_default(g);
529 g->ops.gr.calc_global_ctx_buffer_size(g);
530 return 0;
531}
532
533static int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
534 struct channel_gk20a *c, u64 zcull_va,
535 u32 mode)
536{
537 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
538 struct tegra_vgpu_cmd_msg msg;
539 struct tegra_vgpu_zcull_bind_params *p = &msg.params.zcull_bind;
540 int err;
541
542 gk20a_dbg_fn("");
543
544 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_ZCULL;
545 msg.handle = platform->virt_handle;
546 p->handle = c->virt_ctx;
547 p->zcull_va = zcull_va;
548 p->mode = mode;
549 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
550
551 return (err || msg.ret) ? -ENOMEM : 0;
552}
553
554static int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
555 struct gr_zcull_info *zcull_params)
556{
557 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
558 struct tegra_vgpu_cmd_msg msg;
559 struct tegra_vgpu_zcull_info_params *p = &msg.params.zcull_info;
560 int err;
561
562 gk20a_dbg_fn("");
563
564 msg.cmd = TEGRA_VGPU_CMD_GET_ZCULL_INFO;
565 msg.handle = platform->virt_handle;
566 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
567 if (err || msg.ret)
568 return -ENOMEM;
569
570 zcull_params->width_align_pixels = p->width_align_pixels;
571 zcull_params->height_align_pixels = p->height_align_pixels;
572 zcull_params->pixel_squares_by_aliquots = p->pixel_squares_by_aliquots;
573 zcull_params->aliquot_total = p->aliquot_total;
574 zcull_params->region_byte_multiplier = p->region_byte_multiplier;
575 zcull_params->region_header_size = p->region_header_size;
576 zcull_params->subregion_header_size = p->subregion_header_size;
577 zcull_params->subregion_width_align_pixels =
578 p->subregion_width_align_pixels;
579 zcull_params->subregion_height_align_pixels =
580 p->subregion_height_align_pixels;
581 zcull_params->subregion_count = p->subregion_count;
582
583 return 0;
584}
585
586static void vgpu_remove_gr_support(struct gr_gk20a *gr)
587{
588 gk20a_dbg_fn("");
589
590 gk20a_allocator_destroy(&gr->comp_tags);
591}
592
593static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
594{
595 struct gr_gk20a *gr = &g->gr;
596 int err;
597
598 gk20a_dbg_fn("");
599
600 if (gr->sw_ready) {
601 gk20a_dbg_fn("skip init");
602 return 0;
603 }
604
605 gr->g = g;
606
607 err = vgpu_gr_init_gr_config(g, gr);
608 if (err)
609 goto clean_up;
610
611 err = vgpu_gr_init_ctx_state(g, gr);
612 if (err)
613 goto clean_up;
614
615 err = g->ops.ltc.init_comptags(g, gr);
616 if (err)
617 goto clean_up;
618
619 err = vgpu_gr_alloc_global_ctx_buffers(g);
620 if (err)
621 goto clean_up;
622
623 mutex_init(&gr->ctx_mutex);
624
625 gr->remove_support = vgpu_remove_gr_support;
626 gr->sw_ready = true;
627
628 gk20a_dbg_fn("done");
629 return 0;
630
631clean_up:
632 gk20a_err(dev_from_gk20a(g), "fail");
633 vgpu_remove_gr_support(gr);
634 return err;
635}
636
637int vgpu_init_gr_support(struct gk20a *g)
638{
639 gk20a_dbg_fn("");
640
641 return vgpu_gr_init_gr_setup_sw(g);
642}
643
644struct gr_isr_data {
645 u32 addr;
646 u32 data_lo;
647 u32 data_hi;
648 u32 curr_ctx;
649 u32 chid;
650 u32 offset;
651 u32 sub_chan;
652 u32 class_num;
653};
654
655static int vgpu_gr_handle_notify_pending(struct gk20a *g,
656 struct gr_isr_data *isr_data)
657{
658 struct fifo_gk20a *f = &g->fifo;
659 struct channel_gk20a *ch = &f->channel[isr_data->chid];
660
661 gk20a_dbg_fn("");
662 wake_up(&ch->notifier_wq);
663 return 0;
664}
665
666int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
667{
668 struct gr_isr_data isr_data;
669
670 gk20a_dbg_fn("");
671
672 isr_data.chid = info->chid;
673
674 if (info->type == TEGRA_VGPU_GR_INTR_NOTIFY)
675 vgpu_gr_handle_notify_pending(g, &isr_data);
676
677 return 0;
678}
679
680void vgpu_init_gr_ops(struct gpu_ops *gops)
681{
682 gops->gr.free_channel_ctx = vgpu_gr_free_channel_ctx;
683 gops->gr.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx;
684 gops->gr.free_obj_ctx = vgpu_gr_free_obj_ctx;
685 gops->gr.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull;
686 gops->gr.get_zcull_info = vgpu_gr_get_zcull_info;
687}
diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
new file mode 100644
index 00000000..ddff23b7
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
@@ -0,0 +1,55 @@
1/*
2 * Virtualized GPU L2
3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include "vgpu/vgpu.h"
17
18static int vgpu_determine_L2_size_bytes(struct gk20a *g)
19{
20 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
21 u32 cache_size = 0;
22
23 gk20a_dbg_fn("");
24
25 if (vgpu_get_attribute(platform->virt_handle,
26 TEGRA_VGPU_ATTRIB_L2_SIZE, &cache_size))
27 dev_err(dev_from_gk20a(g), "unable to get L2 size");
28
29 return cache_size;
30}
31
32static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
33{
34 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
35 u32 max_comptag_lines = 0;
36
37 gk20a_dbg_fn("");
38
39 vgpu_get_attribute(platform->virt_handle,
40 TEGRA_VGPU_ATTRIB_COMPTAG_LINES, &max_comptag_lines);
41 if (max_comptag_lines < 2)
42 return -ENXIO;
43
44 gk20a_allocator_init(&gr->comp_tags, "comptag",
45 1, /* start */
46 max_comptag_lines - 1, /* length*/
47 1); /* align */
48 return 0;
49}
50
51void vgpu_init_ltc_ops(struct gpu_ops *gops)
52{
53 gops->ltc.determine_L2_size_bytes = vgpu_determine_L2_size_bytes;
54 gops->ltc.init_comptags = vgpu_ltc_init_comptags;
55}
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
new file mode 100644
index 00000000..6ed1dece
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -0,0 +1,425 @@
1/*
2 * Virtualized GPU Memory Management
3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/dma-mapping.h>
17#include "vgpu/vgpu.h"
18
19/* note: keep the page sizes sorted lowest to highest here */
20static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
21static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
22
23static int vgpu_init_mm_setup_sw(struct gk20a *g)
24{
25 struct mm_gk20a *mm = &g->mm;
26
27 gk20a_dbg_fn("");
28
29 if (mm->sw_ready) {
30 gk20a_dbg_fn("skip init");
31 return 0;
32 }
33
34 mm->g = g;
35 mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
36 mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
37 mm->pde_stride = mm->big_page_size << 10;
38 mm->pde_stride_shift = ilog2(mm->pde_stride);
39 BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
40
41 /*TBD: make channel vm size configurable */
42 mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
43
44 gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
45
46 mm->sw_ready = true;
47
48 return 0;
49}
50
51int vgpu_init_mm_support(struct gk20a *g)
52{
53 gk20a_dbg_fn("");
54
55 return vgpu_init_mm_setup_sw(g);
56}
57
58static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
59 u64 map_offset,
60 struct sg_table *sgt,
61 u64 buffer_offset,
62 u64 size,
63 int pgsz_idx,
64 u8 kind_v,
65 u32 ctag_offset,
66 u32 flags,
67 int rw_flag,
68 bool clear_ctags)
69{
70 int err = 0;
71 struct device *d = dev_from_vm(vm);
72 struct gk20a *g = gk20a_from_vm(vm);
73 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
74 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
75 struct tegra_vgpu_cmd_msg msg;
76 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
77 u64 addr = gk20a_mm_iova_addr(sgt->sgl);
78 u8 prot;
79
80 gk20a_dbg_fn("");
81
82 /* Allocate (or validate when map_offset != 0) the virtual address. */
83 if (!map_offset) {
84 map_offset = gk20a_vm_alloc_va(vm, size,
85 pgsz_idx);
86 if (!map_offset) {
87 gk20a_err(d, "failed to allocate va space");
88 err = -ENOMEM;
89 goto fail;
90 }
91 }
92
93 if (rw_flag == gk20a_mem_flag_read_only)
94 prot = TEGRA_VGPU_MAP_PROT_READ_ONLY;
95 else if (rw_flag == gk20a_mem_flag_write_only)
96 prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY;
97 else
98 prot = TEGRA_VGPU_MAP_PROT_NONE;
99
100 msg.cmd = TEGRA_VGPU_CMD_AS_MAP;
101 msg.handle = platform->virt_handle;
102 p->handle = vm->handle;
103 p->addr = addr;
104 p->gpu_va = map_offset;
105 p->size = size;
106 p->pgsz_idx = pgsz_idx;
107 p->iova = mapping ? 1 : 0;
108 p->kind = kind_v;
109 p->cacheable =
110 (flags & NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE) ? 1 : 0;
111 p->prot = prot;
112 p->ctag_offset = ctag_offset;
113 p->clear_ctags = clear_ctags;
114 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
115 if (err || msg.ret)
116 goto fail;
117
118 vm->tlb_dirty = true;
119 return map_offset;
120fail:
121 gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
122 return 0;
123}
124
125static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
126 u64 vaddr,
127 u64 size,
128 int pgsz_idx,
129 bool va_allocated,
130 int rw_flag)
131{
132 struct gk20a *g = gk20a_from_vm(vm);
133 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
134 struct tegra_vgpu_cmd_msg msg;
135 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
136 int err;
137
138 gk20a_dbg_fn("");
139
140 if (va_allocated) {
141 err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
142 if (err) {
143 dev_err(dev_from_vm(vm),
144 "failed to free va");
145 return;
146 }
147 }
148
149 msg.cmd = TEGRA_VGPU_CMD_AS_UNMAP;
150 msg.handle = platform->virt_handle;
151 p->handle = vm->handle;
152 p->gpu_va = vaddr;
153 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
154 if (err || msg.ret)
155 dev_err(dev_from_vm(vm),
156 "failed to update gmmu ptes on unmap");
157
158 vm->tlb_dirty = true;
159}
160
161static void vgpu_vm_remove_support(struct vm_gk20a *vm)
162{
163 struct gk20a *g = vm->mm->g;
164 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
165 struct mapped_buffer_node *mapped_buffer;
166 struct vm_reserved_va_node *va_node, *va_node_tmp;
167 struct tegra_vgpu_cmd_msg msg;
168 struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
169 struct rb_node *node;
170 int err;
171
172 gk20a_dbg_fn("");
173 mutex_lock(&vm->update_gmmu_lock);
174
175 /* TBD: add a flag here for the unmap code to recognize teardown
176 * and short-circuit any otherwise expensive operations. */
177
178 node = rb_first(&vm->mapped_buffers);
179 while (node) {
180 mapped_buffer =
181 container_of(node, struct mapped_buffer_node, node);
182 gk20a_vm_unmap_locked(mapped_buffer);
183 node = rb_first(&vm->mapped_buffers);
184 }
185
186 /* destroy remaining reserved memory areas */
187 list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
188 reserved_va_list) {
189 list_del(&va_node->reserved_va_list);
190 kfree(va_node);
191 }
192
193 msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
194 msg.handle = platform->virt_handle;
195 p->handle = vm->handle;
196 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
197 WARN_ON(err || msg.ret);
198
199 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
200 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
201
202 mutex_unlock(&vm->update_gmmu_lock);
203
204 /* release zero page if used */
205 if (vm->zero_page_cpuva)
206 dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
207 vm->zero_page_cpuva, vm->zero_page_iova);
208
209 /* vm is not used anymore. release it. */
210 kfree(vm);
211}
212
213u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size)
214{
215 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
216 struct dma_iommu_mapping *mapping =
217 to_dma_iommu_mapping(dev_from_gk20a(g));
218 u64 addr = gk20a_mm_iova_addr((*sgt)->sgl);
219 struct tegra_vgpu_cmd_msg msg;
220 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
221 int err;
222
223 msg.cmd = TEGRA_VGPU_CMD_MAP_BAR1;
224 msg.handle = platform->virt_handle;
225 p->addr = addr;
226 p->size = size;
227 p->iova = mapping ? 1 : 0;
228 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
229 if (err || msg.ret)
230 addr = 0;
231 else
232 addr = p->gpu_va;
233
234 return addr;
235}
236
237/* address space interfaces for the gk20a module */
238static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share)
239{
240 struct gk20a_as *as = as_share->as;
241 struct gk20a *g = gk20a_from_as(as);
242 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
243 struct tegra_vgpu_cmd_msg msg;
244 struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
245 struct mm_gk20a *mm = &g->mm;
246 struct vm_gk20a *vm;
247 u64 vma_size;
248 u32 num_pages, low_hole_pages;
249 char name[32];
250 int err;
251
252 gk20a_dbg_fn("");
253
254 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
255 if (!vm)
256 return -ENOMEM;
257
258 as_share->vm = vm;
259
260 vm->mm = mm;
261 vm->as_share = as_share;
262
263 vm->big_pages = true;
264
265 vm->va_start = mm->pde_stride; /* create a one pde hole */
266 vm->va_limit = mm->channel.size; /* note this means channel.size is
267 really just the max */
268
269 msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE;
270 msg.handle = platform->virt_handle;
271 p->size = vm->va_limit;
272 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
273 if (err || msg.ret)
274 return -ENOMEM;
275
276 vm->handle = p->handle;
277
278 /* low-half: alloc small pages */
279 /* high-half: alloc big pages */
280 vma_size = mm->channel.size >> 1;
281
282 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
283 gmmu_page_sizes[gmmu_page_size_small]>>10);
284 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
285
286 /* num_pages above is without regard to the low-side hole. */
287 low_hole_pages = (vm->va_start >>
288 gmmu_page_shifts[gmmu_page_size_small]);
289
290 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
291 low_hole_pages, /* start */
292 num_pages - low_hole_pages, /* length */
293 1); /* align */
294
295 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
296 gmmu_page_sizes[gmmu_page_size_big]>>10);
297
298 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
299 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
300 num_pages, /* start */
301 num_pages, /* length */
302 1); /* align */
303
304 vm->mapped_buffers = RB_ROOT;
305
306 mutex_init(&vm->update_gmmu_lock);
307 kref_init(&vm->ref);
308 INIT_LIST_HEAD(&vm->reserved_va_list);
309
310 vm->enable_ctag = true;
311
312 return 0;
313}
314
315static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share,
316 struct channel_gk20a *ch)
317{
318 struct vm_gk20a *vm = as_share->vm;
319 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
320 struct tegra_vgpu_cmd_msg msg;
321 struct tegra_vgpu_as_bind_share_params *p = &msg.params.as_bind_share;
322 int err;
323
324 gk20a_dbg_fn("");
325
326 ch->vm = vm;
327 msg.cmd = TEGRA_VGPU_CMD_AS_BIND_SHARE;
328 msg.handle = platform->virt_handle;
329 p->as_handle = vm->handle;
330 p->chan_handle = ch->virt_ctx;
331 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
332
333 if (err || msg.ret) {
334 ch->vm = NULL;
335 err = -ENOMEM;
336 }
337
338 return err;
339}
340
341static void vgpu_cache_maint(u64 handle, u8 op)
342{
343 struct tegra_vgpu_cmd_msg msg;
344 struct tegra_vgpu_cache_maint_params *p = &msg.params.cache_maint;
345 int err;
346
347 msg.cmd = TEGRA_VGPU_CMD_CACHE_MAINT;
348 msg.handle = handle;
349 p->op = op;
350 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
351 WARN_ON(err || msg.ret);
352}
353
354static int vgpu_mm_fb_flush(struct gk20a *g)
355{
356 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
357
358 gk20a_dbg_fn("");
359
360 vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_FB_FLUSH);
361 return 0;
362}
363
364static void vgpu_mm_l2_invalidate(struct gk20a *g)
365{
366 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
367
368 gk20a_dbg_fn("");
369
370 vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_L2_MAINT_INV);
371}
372
373static void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate)
374{
375 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
376 u8 op;
377
378 gk20a_dbg_fn("");
379
380 if (invalidate)
381 op = TEGRA_VGPU_L2_MAINT_FLUSH_INV;
382 else
383 op = TEGRA_VGPU_L2_MAINT_FLUSH;
384
385 vgpu_cache_maint(platform->virt_handle, op);
386}
387
388static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm)
389{
390 struct gk20a *g = gk20a_from_vm(vm);
391 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
392 struct tegra_vgpu_cmd_msg msg;
393 struct tegra_vgpu_as_invalidate_params *p = &msg.params.as_invalidate;
394 int err;
395
396 gk20a_dbg_fn("");
397
398 /* No need to invalidate if tlb is clean */
399 mutex_lock(&vm->update_gmmu_lock);
400 if (!vm->tlb_dirty) {
401 mutex_unlock(&vm->update_gmmu_lock);
402 return;
403 }
404
405 msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE;
406 msg.handle = platform->virt_handle;
407 p->handle = vm->handle;
408 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
409 WARN_ON(err || msg.ret);
410 vm->tlb_dirty = false;
411 mutex_unlock(&vm->update_gmmu_lock);
412}
413
414void vgpu_init_mm_ops(struct gpu_ops *gops)
415{
416 gops->mm.gmmu_map = vgpu_locked_gmmu_map;
417 gops->mm.gmmu_unmap = vgpu_locked_gmmu_unmap;
418 gops->mm.vm_remove = vgpu_vm_remove_support;
419 gops->mm.vm_alloc_share = vgpu_vm_alloc_share;
420 gops->mm.vm_bind_channel = vgpu_vm_bind_channel;
421 gops->mm.fb_flush = vgpu_mm_fb_flush;
422 gops->mm.l2_invalidate = vgpu_mm_l2_invalidate;
423 gops->mm.l2_flush = vgpu_mm_l2_flush;
424 gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate;
425}
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
new file mode 100644
index 00000000..cfe307ff
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -0,0 +1,416 @@
1/*
2 * Virtualized GPU
3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/kthread.h>
17#include <linux/delay.h>
18#include <linux/dma-mapping.h>
19#include <linux/pm_runtime.h>
20#include "vgpu/vgpu.h"
21#include "gk20a/debug_gk20a.h"
22#include "gk20a/hal_gk20a.h"
23#include "gk20a/hw_mc_gk20a.h"
24
25static inline int vgpu_comm_init(struct platform_device *pdev)
26{
27 size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES };
28
29 return tegra_gr_comm_init(pdev, TEGRA_GR_COMM_CTX_CLIENT, 3,
30 queue_sizes, TEGRA_VGPU_QUEUE_CMD,
31 ARRAY_SIZE(queue_sizes));
32}
33
34static inline void vgpu_comm_deinit(void)
35{
36 size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES };
37
38 tegra_gr_comm_deinit(TEGRA_GR_COMM_CTX_CLIENT, TEGRA_VGPU_QUEUE_CMD,
39 ARRAY_SIZE(queue_sizes));
40}
41
42int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in,
43 size_t size_out)
44{
45 void *handle;
46 size_t size = size_in;
47 void *data = msg;
48 int err;
49
50 err = tegra_gr_comm_sendrecv(TEGRA_GR_COMM_CTX_CLIENT,
51 tegra_gr_comm_get_server_vmid(),
52 TEGRA_VGPU_QUEUE_CMD, &handle, &data, &size);
53 if (!err) {
54 WARN_ON(size < size_out);
55 memcpy(msg, data, size_out);
56 tegra_gr_comm_release(handle);
57 }
58
59 return err;
60}
61
62static u64 vgpu_connect(void)
63{
64 struct tegra_vgpu_cmd_msg msg;
65 struct tegra_vgpu_connect_params *p = &msg.params.connect;
66 int err;
67
68 msg.cmd = TEGRA_VGPU_CMD_CONNECT;
69 p->module = TEGRA_VGPU_MODULE_GPU;
70 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
71
72 return (err || msg.ret) ? 0 : p->handle;
73}
74
75int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value)
76{
77 struct tegra_vgpu_cmd_msg msg;
78 struct tegra_vgpu_attrib_params *p = &msg.params.attrib;
79 int err;
80
81 msg.cmd = TEGRA_VGPU_CMD_GET_ATTRIBUTE;
82 msg.handle = handle;
83 p->attrib = attrib;
84 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
85
86 if (err || msg.ret)
87 return -1;
88
89 *value = p->value;
90 return 0;
91}
92
93static int vgpu_intr_thread(void *dev_id)
94{
95 struct gk20a *g = dev_id;
96
97 while (true) {
98 struct tegra_vgpu_intr_msg *msg;
99 u32 sender;
100 void *handle;
101 size_t size;
102 int err;
103
104 err = tegra_gr_comm_recv(TEGRA_GR_COMM_CTX_CLIENT,
105 TEGRA_VGPU_QUEUE_INTR, &handle,
106 (void **)&msg, &size, &sender);
107 if (WARN_ON(err))
108 continue;
109
110 if (msg->event == TEGRA_VGPU_EVENT_ABORT) {
111 tegra_gr_comm_release(handle);
112 break;
113 }
114
115 if (msg->unit == TEGRA_VGPU_INTR_GR)
116 vgpu_gr_isr(g, &msg->info.gr_intr);
117
118 tegra_gr_comm_release(handle);
119 }
120
121 while (!kthread_should_stop())
122 msleep(10);
123 return 0;
124}
125
126static void vgpu_remove_support(struct platform_device *dev)
127{
128 struct gk20a *g = get_gk20a(dev);
129 struct gk20a_platform *platform = gk20a_get_platform(dev);
130 struct tegra_vgpu_intr_msg msg;
131 int err;
132
133 if (g->pmu.remove_support)
134 g->pmu.remove_support(&g->pmu);
135
136 if (g->gr.remove_support)
137 g->gr.remove_support(&g->gr);
138
139 if (g->fifo.remove_support)
140 g->fifo.remove_support(&g->fifo);
141
142 if (g->mm.remove_support)
143 g->mm.remove_support(&g->mm);
144
145 msg.event = TEGRA_VGPU_EVENT_ABORT;
146 err = tegra_gr_comm_send(TEGRA_GR_COMM_CTX_CLIENT,
147 TEGRA_GR_COMM_ID_SELF, TEGRA_VGPU_QUEUE_INTR,
148 &msg, sizeof(msg));
149 WARN_ON(err);
150 kthread_stop(platform->intr_handler);
151
152 /* free mappings to registers, etc*/
153
154 if (g->bar1) {
155 iounmap(g->bar1);
156 g->bar1 = 0;
157 }
158}
159
160static int vgpu_init_support(struct platform_device *dev)
161{
162 struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, 0);
163 struct gk20a *g = get_gk20a(dev);
164 int err = 0;
165
166 if (!r) {
167 dev_err(dev_from_gk20a(g), "faield to get gk20a bar1\n");
168 err = -ENXIO;
169 goto fail;
170 }
171
172 g->bar1 = devm_request_and_ioremap(&dev->dev, r);
173 if (!g->bar1) {
174 dev_err(dev_from_gk20a(g), "failed to remap gk20a bar1\n");
175 err = -ENXIO;
176 goto fail;
177 }
178
179 mutex_init(&g->dbg_sessions_lock);
180 mutex_init(&g->client_lock);
181
182 g->remove_support = vgpu_remove_support;
183 return 0;
184
185 fail:
186 vgpu_remove_support(dev);
187 return err;
188}
189
190int vgpu_pm_prepare_poweroff(struct device *dev)
191{
192 struct platform_device *pdev = to_platform_device(dev);
193 struct gk20a *g = get_gk20a(pdev);
194 int ret = 0;
195
196 gk20a_dbg_fn("");
197
198 if (!g->power_on)
199 return 0;
200
201 ret = gk20a_channel_suspend(g);
202 if (ret)
203 return ret;
204
205 g->power_on = false;
206
207 return ret;
208}
209
210static void vgpu_detect_chip(struct gk20a *g)
211{
212 struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics;
213 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
214
215 u32 mc_boot_0_value;
216
217 if (vgpu_get_attribute(platform->virt_handle,
218 TEGRA_VGPU_ATTRIB_PMC_BOOT_0,
219 &mc_boot_0_value)) {
220 gk20a_err(dev_from_gk20a(g), "failed to detect chip");
221 return;
222 }
223
224 gpu->arch = mc_boot_0_architecture_v(mc_boot_0_value) <<
225 NVHOST_GPU_ARCHITECTURE_SHIFT;
226 gpu->impl = mc_boot_0_implementation_v(mc_boot_0_value);
227 gpu->rev =
228 (mc_boot_0_major_revision_v(mc_boot_0_value) << 4) |
229 mc_boot_0_minor_revision_v(mc_boot_0_value);
230
231 gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
232 g->gpu_characteristics.arch,
233 g->gpu_characteristics.impl,
234 g->gpu_characteristics.rev);
235}
236
237static int vgpu_init_hal(struct gk20a *g)
238{
239 u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
240
241 switch (ver) {
242 case GK20A_GPUID_GK20A:
243 gk20a_dbg_info("gk20a detected");
244 /* init gk20a ops then override with virt extensions */
245 gk20a_init_hal(&g->ops);
246 vgpu_init_fifo_ops(&g->ops);
247 vgpu_init_gr_ops(&g->ops);
248 vgpu_init_ltc_ops(&g->ops);
249 vgpu_init_mm_ops(&g->ops);
250 break;
251 default:
252 gk20a_err(&g->dev->dev, "no support for %x", ver);
253 return -ENODEV;
254 }
255
256 return 0;
257}
258
259int vgpu_pm_finalize_poweron(struct device *dev)
260{
261 struct platform_device *pdev = to_platform_device(dev);
262 struct gk20a *g = get_gk20a(pdev);
263 int err;
264
265 gk20a_dbg_fn("");
266
267 if (g->power_on)
268 return 0;
269
270 g->power_on = true;
271
272 vgpu_detect_chip(g);
273 err = vgpu_init_hal(g);
274 if (err)
275 goto done;
276
277 err = vgpu_init_mm_support(g);
278 if (err) {
279 gk20a_err(dev, "failed to init gk20a mm");
280 goto done;
281 }
282
283 err = vgpu_init_fifo_support(g);
284 if (err) {
285 gk20a_err(dev, "failed to init gk20a fifo");
286 goto done;
287 }
288
289 err = vgpu_init_gr_support(g);
290 if (err) {
291 gk20a_err(dev, "failed to init gk20a gr");
292 goto done;
293 }
294
295 err = gk20a_init_gpu_characteristics(g);
296 if (err) {
297 gk20a_err(dev, "failed to init gk20a gpu characteristics");
298 goto done;
299 }
300
301 gk20a_channel_resume(g);
302
303done:
304 return err;
305}
306
307static int vgpu_pm_init(struct platform_device *dev)
308{
309 int err = 0;
310
311 gk20a_dbg_fn("");
312
313 pm_runtime_enable(&dev->dev);
314 return err;
315}
316
317int vgpu_probe(struct platform_device *dev)
318{
319 struct gk20a *gk20a;
320 int err;
321 struct gk20a_platform *platform = gk20a_get_platform(dev);
322
323 if (!platform) {
324 dev_err(&dev->dev, "no platform data\n");
325 return -ENODATA;
326 }
327
328 gk20a_dbg_fn("");
329
330 gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
331 if (!gk20a) {
332 dev_err(&dev->dev, "couldn't allocate gk20a support");
333 return -ENOMEM;
334 }
335
336 platform->g = gk20a;
337 gk20a->dev = dev;
338
339 err = gk20a_user_init(dev);
340 if (err)
341 return err;
342
343 vgpu_init_support(dev);
344
345 init_rwsem(&gk20a->busy_lock);
346
347 spin_lock_init(&gk20a->mc_enable_lock);
348
349 /* Initialize the platform interface. */
350 err = platform->probe(dev);
351 if (err) {
352 dev_err(&dev->dev, "platform probe failed");
353 return err;
354 }
355
356 err = vgpu_pm_init(dev);
357 if (err) {
358 dev_err(&dev->dev, "pm init failed");
359 return err;
360 }
361
362 if (platform->late_probe) {
363 err = platform->late_probe(dev);
364 if (err) {
365 dev_err(&dev->dev, "late probe failed");
366 return err;
367 }
368 }
369
370 err = vgpu_comm_init(dev);
371 if (err) {
372 dev_err(&dev->dev, "failed to init comm interface\n");
373 return -ENOSYS;
374 }
375
376 platform->virt_handle = vgpu_connect();
377 if (!platform->virt_handle) {
378 dev_err(&dev->dev, "failed to connect to server node\n");
379 vgpu_comm_deinit();
380 return -ENOSYS;
381 }
382
383 platform->intr_handler = kthread_run(vgpu_intr_thread, gk20a, "gk20a");
384 if (IS_ERR(platform->intr_handler))
385 return -ENOMEM;
386
387 gk20a_debug_init(dev);
388
389 /* Set DMA parameters to allow larger sgt lists */
390 dev->dev.dma_parms = &gk20a->dma_parms;
391 dma_set_max_seg_size(&dev->dev, UINT_MAX);
392
393 gk20a->gr_idle_timeout_default =
394 CONFIG_GK20A_DEFAULT_TIMEOUT;
395 gk20a->timeouts_enabled = true;
396
397 gk20a_create_sysfs(dev);
398 gk20a_init_gr(gk20a);
399
400 return 0;
401}
402
403int vgpu_remove(struct platform_device *dev)
404{
405 struct gk20a *g = get_gk20a(dev);
406 gk20a_dbg_fn("");
407
408 if (g->remove_support)
409 g->remove_support(dev);
410
411 vgpu_comm_deinit();
412 gk20a_user_deinit(dev);
413 gk20a_get_platform(dev)->g = NULL;
414 kfree(g);
415 return 0;
416}
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.h b/drivers/gpu/nvgpu/vgpu/vgpu.h
new file mode 100644
index 00000000..445a1c90
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.h
@@ -0,0 +1,41 @@
1/*
2 * Virtualized GPU Interfaces
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef _VIRT_H_
17#define _VIRT_H_
18
19#include <linux/tegra_gr_comm.h>
20#include <linux/tegra_vgpu.h>
21#include "gk20a/gk20a.h"
22
23int vgpu_pm_prepare_poweroff(struct device *dev);
24int vgpu_pm_finalize_poweron(struct device *dev);
25int vgpu_probe(struct platform_device *dev);
26int vgpu_remove(struct platform_device *dev);
27u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size);
28int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info);
29void vgpu_init_fifo_ops(struct gpu_ops *gops);
30void vgpu_init_gr_ops(struct gpu_ops *gops);
31void vgpu_init_ltc_ops(struct gpu_ops *gops);
32void vgpu_init_mm_ops(struct gpu_ops *gops);
33int vgpu_init_mm_support(struct gk20a *g);
34int vgpu_init_gr_support(struct gk20a *g);
35int vgpu_init_fifo_support(struct gk20a *g);
36
37int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value);
38int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in,
39 size_t size_out);
40
41#endif