From 1fd722f592c2e0523c5e399a2406a4e387057188 Mon Sep 17 00:00:00 2001 From: Aingara Paramakuru Date: Mon, 5 May 2014 21:14:22 -0400 Subject: gpu: nvgpu: support gk20a virtualization The nvgpu driver now supports using the Tegra graphics virtualization interfaces to support gk20a in a virtualized environment. Bug 1509608 Change-Id: I6ede15ee7bf0b0ad8a13e8eb5f557c3516ead676 Signed-off-by: Aingara Paramakuru Reviewed-on: http://git-master/r/440122 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/gk20a/Makefile | 1 + drivers/gpu/nvgpu/gk20a/as_gk20a.c | 5 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 89 ++-- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 11 + drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 10 +- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 9 +- drivers/gpu/nvgpu/gk20a/gk20a.c | 46 +- drivers/gpu/nvgpu/gk20a/gk20a.h | 49 ++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 7 +- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 115 +++-- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 35 ++ drivers/gpu/nvgpu/gk20a/platform_gk20a.h | 9 + drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c | 64 +++ drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 9 + drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 5 + drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 9 + drivers/gpu/nvgpu/vgpu/Makefile | 10 + drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 569 +++++++++++++++++++++ drivers/gpu/nvgpu/vgpu/gr_vgpu.c | 687 ++++++++++++++++++++++++++ drivers/gpu/nvgpu/vgpu/ltc_vgpu.c | 55 +++ drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 425 ++++++++++++++++ drivers/gpu/nvgpu/vgpu/vgpu.c | 416 ++++++++++++++++ drivers/gpu/nvgpu/vgpu/vgpu.h | 41 ++ 24 files changed, 2576 insertions(+), 101 deletions(-) create mode 100644 drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c create mode 100644 drivers/gpu/nvgpu/vgpu/Makefile create mode 100644 drivers/gpu/nvgpu/vgpu/fifo_vgpu.c create mode 100644 drivers/gpu/nvgpu/vgpu/gr_vgpu.c create mode 100644 drivers/gpu/nvgpu/vgpu/ltc_vgpu.c create mode 100644 drivers/gpu/nvgpu/vgpu/mm_vgpu.c create mode 100644 drivers/gpu/nvgpu/vgpu/vgpu.c create mode 100644 drivers/gpu/nvgpu/vgpu/vgpu.h (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 0fb6090a..6544b315 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -5,3 +5,4 @@ ccflags-y += -Werror obj-$(CONFIG_GK20A) += gk20a/ obj-$(CONFIG_GK20A) += gm20b/ +obj-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += vgpu/ diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile index aa9237b4..fbc9cbec 100644 --- a/drivers/gpu/nvgpu/gk20a/Makefile +++ b/drivers/gpu/nvgpu/gk20a/Makefile @@ -39,5 +39,6 @@ nvgpu-y := \ tsg_gk20a.o nvgpu-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o nvgpu-$(CONFIG_SYNC) += sync_gk20a.o +nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += platform_vgpu_tegra.o obj-$(CONFIG_GK20A) := nvgpu.o diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 4849dbd5..1a1ca8ff 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c @@ -40,6 +40,7 @@ static void release_as_share_id(struct gk20a_as *as, int id) static int gk20a_as_alloc_share(struct gk20a_as *as, struct gk20a_as_share **out) { + struct gk20a *g = gk20a_from_as(as); struct gk20a_as_share *as_share; int err = 0; @@ -55,7 +56,7 @@ static int gk20a_as_alloc_share(struct gk20a_as *as, as_share->ref_cnt.counter = 1; /* this will set as_share->vm. */ - err = gk20a_vm_alloc_share(as_share); + err = g->ops.mm.vm_alloc_share(as_share); if (err) goto failed; @@ -106,7 +107,7 @@ static int gk20a_as_ioctl_bind_channel( atomic_inc(&as_share->ref_cnt); /* this will set channel_gk20a->vm */ - err = gk20a_vm_bind_channel(as_share, ch); + err = ch->g->ops.mm.vm_bind_channel(as_share, ch); if (err) { atomic_dec(&as_share->ref_cnt); return err; diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 45757884..669ec294 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -56,16 +56,9 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); static int channel_gk20a_commit_userd(struct channel_gk20a *c); static int channel_gk20a_setup_userd(struct channel_gk20a *c); -static int channel_gk20a_setup_ramfc(struct channel_gk20a *c, - u64 gpfifo_base, u32 gpfifo_entries); static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a); -static int channel_gk20a_alloc_inst(struct gk20a *g, - struct channel_gk20a *ch); -static void channel_gk20a_free_inst(struct gk20a *g, - struct channel_gk20a *ch); - static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add); static void gk20a_free_error_notifiers(struct channel_gk20a *ch); @@ -173,12 +166,10 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, return -ENOMEM; /* disable channel */ - gk20a_writel(c->g, ccsr_channel_r(c->hw_chid), - gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | - ccsr_channel_enable_clr_true_f()); + c->g->ops.fifo.disable_channel(c); /* preempt the channel */ - WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid)); + WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid)); /* value field is 8 bits long */ while (value >= 1 << 8) { @@ -206,8 +197,8 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, return 0; } -static int channel_gk20a_setup_ramfc(struct channel_gk20a *c, - u64 gpfifo_base, u32 gpfifo_entries) +int channel_gk20a_setup_ramfc(struct channel_gk20a *c, + u64 gpfifo_base, u32 gpfifo_entries) { void *inst_ptr; @@ -269,7 +260,7 @@ static int channel_gk20a_setup_ramfc(struct channel_gk20a *c, gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); - return 0; + return channel_gk20a_commit_userd(c); } static int channel_gk20a_setup_userd(struct channel_gk20a *c) @@ -347,8 +338,7 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a) } } -static int channel_gk20a_alloc_inst(struct gk20a *g, - struct channel_gk20a *ch) +int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) { struct device *d = dev_from_gk20a(g); int err = 0; @@ -384,12 +374,11 @@ static int channel_gk20a_alloc_inst(struct gk20a *g, clean_up: gk20a_err(d, "fail"); - channel_gk20a_free_inst(g, ch); + g->ops.fifo.free_inst(g, ch); return err; } -static void channel_gk20a_free_inst(struct gk20a *g, - struct channel_gk20a *ch) +void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch) { struct device *d = dev_from_gk20a(g); @@ -403,7 +392,16 @@ static void channel_gk20a_free_inst(struct gk20a *g, static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add) { - return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true); + return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true); +} + +void channel_gk20a_disable(struct channel_gk20a *ch) +{ + /* disable channel */ + gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), + gk20a_readl(ch->g, + ccsr_channel_r(ch->hw_chid)) | + ccsr_channel_enable_clr_true_f()); } void gk20a_channel_abort(struct channel_gk20a *ch) @@ -426,11 +424,7 @@ void gk20a_channel_abort(struct channel_gk20a *ch) } mutex_unlock(&ch->jobs_lock); - /* disable channel */ - gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), - gk20a_readl(ch->g, - ccsr_channel_r(ch->hw_chid)) | - ccsr_channel_enable_clr_true_f()); + ch->g->ops.fifo.disable_channel(ch); if (released_job_semaphore) { wake_up_interruptible_all(&ch->semaphore_wq); @@ -479,7 +473,7 @@ void gk20a_disable_channel(struct channel_gk20a *ch, gk20a_wait_channel_idle(ch); /* preempt the channel */ - gk20a_fifo_preempt_channel(ch->g, ch->hw_chid); + ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid); /* remove channel from runlist */ channel_gk20a_update_runlist(ch, false); @@ -643,7 +637,7 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish) gk20a_free_error_notifiers(ch); /* release channel ctx */ - gk20a_free_channel_ctx(ch); + g->ops.gr.free_channel_ctx(ch); gk20a_gr_flush_channel_tlb(gr); @@ -683,8 +677,8 @@ unbind: if (gk20a_is_channel_marked_as_tsg(ch)) gk20a_tsg_unbind_channel(ch); - channel_gk20a_unbind(ch); - channel_gk20a_free_inst(g, ch); + g->ops.fifo.unbind_channel(ch); + g->ops.fifo.free_inst(g, ch); ch->vpr = false; ch->vm = NULL; @@ -747,7 +741,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g) ch->g = g; - if (channel_gk20a_alloc_inst(g, ch)) { + if (g->ops.fifo.alloc_inst(g, ch)) { ch->in_use = false; gk20a_err(dev_from_gk20a(g), "failed to open gk20a channel, out of inst mem"); @@ -1097,7 +1091,6 @@ static void recycle_priv_cmdbuf(struct channel_gk20a *c) gk20a_dbg_fn("done"); } - int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, struct nvhost_alloc_gpfifo_args *args) { @@ -1181,10 +1174,11 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d", c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num); - channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num); - channel_gk20a_setup_userd(c); - channel_gk20a_commit_userd(c); + + err = g->ops.fifo.setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num); + if (err) + goto clean_up_unmap; /* TBD: setup engine contexts */ @@ -1550,7 +1544,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, /* We don't know what context is currently running... */ /* Note also: there can be more than one context associated with the */ /* address space (vm). */ - gk20a_mm_tlb_invalidate(c->vm); + g->ops.mm.tlb_invalidate(c->vm); /* Make sure we have enough space for gpfifo entries. If not, * wait for signals from completed submits */ @@ -1929,7 +1923,7 @@ static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, gk20a_dbg_fn(""); - return gr_gk20a_bind_ctxsw_zcull(g, gr, ch, + return g->ops.gr.bind_ctxsw_zcull(g, gr, ch, args->gpu_va, args->mode); } @@ -1945,7 +1939,7 @@ int gk20a_channel_suspend(struct gk20a *g) gk20a_dbg_fn(""); /* wait for engine idle */ - err = gk20a_fifo_wait_engine_idle(g); + err = g->ops.fifo.wait_engine_idle(g); if (err) return err; @@ -1954,22 +1948,20 @@ int gk20a_channel_suspend(struct gk20a *g) gk20a_dbg_info("suspend channel %d", chid); /* disable channel */ - gk20a_writel(g, ccsr_channel_r(chid), - gk20a_readl(g, ccsr_channel_r(chid)) | - ccsr_channel_enable_clr_true_f()); + g->ops.fifo.disable_channel(&f->channel[chid]); /* preempt the channel */ - gk20a_fifo_preempt_channel(g, chid); + g->ops.fifo.preempt_channel(g, chid); channels_in_use = true; } } if (channels_in_use) { - gk20a_fifo_update_runlist(g, 0, ~0, false, true); + g->ops.fifo.update_runlist(g, 0, ~0, false, true); for (chid = 0; chid < f->num_channels; chid++) { if (f->channel[chid].in_use) - channel_gk20a_unbind(&f->channel[chid]); + g->ops.fifo.unbind_channel(&f->channel[chid]); } } @@ -1996,7 +1988,7 @@ int gk20a_channel_resume(struct gk20a *g) } if (channels_in_use) - gk20a_fifo_update_runlist(g, 0, ~0, true, true); + g->ops.fifo.update_runlist(g, 0, ~0, true, true); gk20a_dbg_fn("done"); return 0; @@ -2074,6 +2066,11 @@ clean_up: void gk20a_init_channel(struct gpu_ops *gops) { gops->fifo.bind_channel = channel_gk20a_bind; + gops->fifo.unbind_channel = channel_gk20a_unbind; + gops->fifo.disable_channel = channel_gk20a_disable; + gops->fifo.alloc_inst = channel_gk20a_alloc_inst; + gops->fifo.free_inst = channel_gk20a_free_inst; + gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; } long gk20a_channel_ioctl(struct file *filp, @@ -2144,7 +2141,7 @@ long gk20a_channel_ioctl(struct file *filp, __func__, cmd); return err; } - err = gk20a_alloc_obj_ctx(ch, + err = ch->g->ops.gr.alloc_obj_ctx(ch, (struct nvhost_alloc_obj_ctx_args *)buf); gk20a_idle(dev); break; @@ -2156,7 +2153,7 @@ long gk20a_channel_ioctl(struct file *filp, __func__, cmd); return err; } - err = gk20a_free_obj_ctx(ch, + err = ch->g->ops.gr.free_obj_ctx(ch, (struct nvhost_free_obj_ctx_args *)buf); gk20a_idle(dev); break; diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 2ea3eccb..37ca8244 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -144,6 +144,10 @@ struct channel_gk20a { void *error_notifier_va; struct gk20a_channel_sync *sync; + +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + u64 virt_ctx; +#endif }; static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) @@ -193,4 +197,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, struct nvhost_alloc_gpfifo_args *args); +void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); +void channel_gk20a_disable(struct channel_gk20a *ch); +int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch); +void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch); +int channel_gk20a_setup_ramfc(struct channel_gk20a *c, + u64 gpfifo_base, u32 gpfifo_entries); + #endif /*__CHANNEL_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index e5628c3f..7338f842 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c @@ -158,6 +158,9 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg struct zbc_entry *zbc_val; struct zbc_query_params *zbc_tbl; int i, err = 0; +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + struct gk20a_platform *platform = platform_get_drvdata(dev); +#endif gk20a_dbg_fn(""); @@ -197,7 +200,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg if (zcull_info == NULL) return -ENOMEM; - err = gr_gk20a_get_zcull_info(g, &g->gr, zcull_info); + err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info); if (err) { kfree(zcull_info); break; @@ -219,6 +222,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg case NVHOST_GPU_IOCTL_ZBC_SET_TABLE: set_table_args = (struct nvhost_gpu_zbc_set_table_args *)buf; +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + if (platform->virtual_dev) + return -ENOMEM; +#endif + zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL); if (zbc_val == NULL) return -ENOMEM; diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 4363129d..e6b3fd5f 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -1173,7 +1173,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose) gk20a_channel_abort(ch); for (i = 0; i < g->fifo.max_runlists; i++) - gk20a_fifo_update_runlist(g, i, + g->ops.fifo.update_runlist(g, i, hw_chid, false, false); if (gk20a_fifo_set_ctx_mmu_error(g, ch)) @@ -1620,7 +1620,7 @@ int gk20a_fifo_disable_engine_activity(struct gk20a *g, pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat); if (pbdma_chid != ~0) { - err = gk20a_fifo_preempt_channel(g, pbdma_chid); + err = g->ops.fifo.preempt_channel(g, pbdma_chid); if (err) goto clean_up; } @@ -1636,7 +1636,7 @@ int gk20a_fifo_disable_engine_activity(struct gk20a *g, engine_chid = fifo_engine_status_next_id_v(eng_stat); if (engine_chid != ~0 && engine_chid != pbdma_chid) { - err = gk20a_fifo_preempt_channel(g, engine_chid); + err = g->ops.fifo.preempt_channel(g, engine_chid); if (err) goto clean_up; } @@ -1960,6 +1960,9 @@ static void gk20a_fifo_apply_pb_timeout(struct gk20a *g) void gk20a_init_fifo(struct gpu_ops *gops) { gk20a_init_channel(gops); + gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; + gops->fifo.update_runlist = gk20a_fifo_update_runlist; gops->fifo.trigger_mmu_fault = gk20a_fifo_trigger_mmu_fault; gops->fifo.apply_pb_timeout = gk20a_fifo_apply_pb_timeout; + gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 0816878a..3499cc89 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -57,6 +58,9 @@ #include "dbg_gpu_gk20a.h" #include "hal.h" #include "nvhost_acm.h" +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION +#include "vgpu/vgpu.h" +#endif #define CREATE_TRACE_POINTS #include @@ -737,6 +741,17 @@ static int gk20a_init_client(struct platform_device *dev) gk20a_dbg_fn(""); +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + { + struct gk20a_platform *platform = gk20a_get_platform(dev); + + if (platform->virtual_dev) { + err = vgpu_pm_finalize_poweron(&dev->dev); + if (err) + return err; + } + } +#endif #ifndef CONFIG_PM_RUNTIME gk20a_pm_finalize_poweron(&dev->dev); #endif @@ -753,6 +768,16 @@ static int gk20a_init_client(struct platform_device *dev) static void gk20a_deinit_client(struct platform_device *dev) { gk20a_dbg_fn(""); +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + { + struct gk20a_platform *platform = gk20a_get_platform(dev); + + if (platform->virtual_dev) { + vgpu_pm_prepare_poweroff(&dev->dev); + return; + } + } +#endif #ifndef CONFIG_PM_RUNTIME gk20a_pm_prepare_poweroff(&dev->dev); #endif @@ -1006,6 +1031,10 @@ static struct of_device_id tegra_gk20a_of_match[] = { .data = &gk20a_tegra_platform }, { .compatible = "nvidia,tegra210-gm20b", .data = &gm20b_tegra_platform }, +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + { .compatible = "nvidia,tegra124-gk20a-vgpu", + .data = &vgpu_tegra_platform }, +#endif #else { .compatible = "nvidia,tegra124-gk20a", .data = &gk20a_generic_platform }, @@ -1057,7 +1086,7 @@ static int gk20a_create_device( return 0; } -static void gk20a_user_deinit(struct platform_device *dev) +void gk20a_user_deinit(struct platform_device *dev) { struct gk20a *g = get_gk20a(dev); @@ -1098,7 +1127,7 @@ static void gk20a_user_deinit(struct platform_device *dev) class_destroy(g->class); } -static int gk20a_user_init(struct platform_device *dev) +int gk20a_user_init(struct platform_device *dev) { int err; dev_t devno; @@ -1403,6 +1432,11 @@ static int gk20a_probe(struct platform_device *dev) platform_set_drvdata(dev, platform); +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + if (platform->virtual_dev) + return vgpu_probe(dev); +#endif + gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL); if (!gk20a) { dev_err(&dev->dev, "couldn't allocate gk20a support"); @@ -1546,8 +1580,16 @@ static int gk20a_probe(struct platform_device *dev) static int __exit gk20a_remove(struct platform_device *dev) { struct gk20a *g = get_gk20a(dev); +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + struct gk20a_platform *platform = gk20a_get_platform(dev); +#endif gk20a_dbg_fn(""); +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + if (platform->virtual_dev) + return vgpu_remove(dev); +#endif + #ifdef CONFIG_INPUT_CFBOOST if (g->boost_added) cfb_remove_device(&dev->dev); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index a1080f0b..b813541a 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -131,6 +131,16 @@ struct gpu_ops { u32 reg_offset); int (*load_ctxsw_ucode)(struct gk20a *g); u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); + void (*free_channel_ctx)(struct channel_gk20a *c); + int (*alloc_obj_ctx)(struct channel_gk20a *c, + struct nvhost_alloc_obj_ctx_args *args); + int (*free_obj_ctx)(struct channel_gk20a *c, + struct nvhost_free_obj_ctx_args *args); + int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr, + struct channel_gk20a *c, u64 zcull_va, + u32 mode); + int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr, + struct gr_zcull_info *zcull_params); } gr; const char *name; struct { @@ -148,9 +158,20 @@ struct gpu_ops { } clock_gating; struct { void (*bind_channel)(struct channel_gk20a *ch_gk20a); + void (*unbind_channel)(struct channel_gk20a *ch_gk20a); + void (*disable_channel)(struct channel_gk20a *ch); + int (*alloc_inst)(struct gk20a *g, struct channel_gk20a *ch); + void (*free_inst)(struct gk20a *g, struct channel_gk20a *ch); + int (*setup_ramfc)(struct channel_gk20a *c, u64 gpfifo_base, + u32 gpfifo_entries); + int (*preempt_channel)(struct gk20a *g, u32 hw_chid); + int (*update_runlist)(struct gk20a *g, u32 runlist_id, + u32 hw_chid, bool add, + bool wait_for_finish); void (*trigger_mmu_fault)(struct gk20a *g, unsigned long engine_ids); void (*apply_pb_timeout)(struct gk20a *g); + int (*wait_engine_idle)(struct gk20a *g); } fifo; struct pmu_v { /*used for change of enum zbc update cmd id from ver 0 to ver1*/ @@ -241,6 +262,31 @@ struct gpu_ops { void (*clear_sparse)(struct vm_gk20a *vm, u64 vaddr, u64 size, u32 pgsz_idx); bool (*is_debug_mode_enabled)(struct gk20a *g); + u64 (*gmmu_map)(struct vm_gk20a *vm, + u64 map_offset, + struct sg_table *sgt, + u64 buffer_offset, + u64 size, + int pgsz_idx, + u8 kind_v, + u32 ctag_offset, + u32 flags, + int rw_flag, + bool clear_ctags); + void (*gmmu_unmap)(struct vm_gk20a *vm, + u64 vaddr, + u64 size, + int pgsz_idx, + bool va_allocated, + int rw_flag); + void (*vm_remove)(struct vm_gk20a *vm); + int (*vm_alloc_share)(struct gk20a_as_share *as_share); + int (*vm_bind_channel)(struct gk20a_as_share *as_share, + struct channel_gk20a *ch); + int (*fb_flush)(struct gk20a *g); + void (*l2_invalidate)(struct gk20a *g); + void (*l2_flush)(struct gk20a *g, bool invalidate); + void (*tlb_invalidate)(struct vm_gk20a *vm); } mm; struct { int (*prepare_ucode)(struct gk20a *g); @@ -648,4 +694,7 @@ gk20a_request_firmware(struct gk20a *g, const char *fw_name); int gk20a_init_gpu_characteristics(struct gk20a *g); +int gk20a_user_init(struct platform_device *dev); +void gk20a_user_deinit(struct platform_device *dev); + #endif /* _NVHOST_GK20A_H_ */ diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index ef7776df..892a138e 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -825,7 +825,7 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c, } } - gk20a_mm_fb_flush(g); + g->ops.mm.fb_flush(g); gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, ch_ctx->zcull_ctx.ctx_sw_mode); @@ -7077,4 +7077,9 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments; gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; gops->gr.get_gpc_tpc_mask = gr_gk20a_get_gpc_tpc_mask; + gops->gr.free_channel_ctx = gk20a_free_channel_ctx; + gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx; + gops->gr.free_obj_ctx = gk20a_free_obj_ctx; + gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull; + gops->gr.get_zcull_info = gr_gk20a_get_zcull_info; } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 654938b2..3feb675b 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -88,7 +88,6 @@ static inline u32 lo32(u64 f) return (u32)(f & 0xffffffff); } -static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); static struct mapped_buffer_node *find_mapped_buffer_locked( struct rb_root *root, u64 addr); static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( @@ -100,7 +99,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, u64 first_vaddr, u64 last_vaddr, u8 kind_v, u32 ctag_offset, bool cacheable, int rw_flag); -static void gk20a_vm_remove_support(struct vm_gk20a *vm); static int gk20a_init_system_vm(struct mm_gk20a *mm); static int gk20a_init_bar1_vm(struct mm_gk20a *mm); @@ -335,6 +333,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) gk20a_init_bar1_vm(mm); gk20a_init_system_vm(mm); + /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ + g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; mm->remove_support = gk20a_remove_mm_support; mm->sw_ready = true; @@ -833,9 +833,9 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset) mutex_unlock(&vm->update_gmmu_lock); } -static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, - u64 size, - enum gmmu_pgsz_gk20a gmmu_pgsz_idx) +u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, + u64 size, + enum gmmu_pgsz_gk20a gmmu_pgsz_idx) { struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; @@ -881,9 +881,9 @@ static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, return offset; } -static int gk20a_vm_free_va(struct vm_gk20a *vm, - u64 offset, u64 size, - enum gmmu_pgsz_gk20a pgsz_idx) +int gk20a_vm_free_va(struct vm_gk20a *vm, + u64 offset, u64 size, + enum gmmu_pgsz_gk20a pgsz_idx) { struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; u32 page_size = gmmu_page_sizes[pgsz_idx]; @@ -1100,21 +1100,32 @@ static int validate_fixed_buffer(struct vm_gk20a *vm, return 0; } -static u64 __locked_gmmu_map(struct vm_gk20a *vm, - u64 map_offset, - struct sg_table *sgt, - u64 buffer_offset, - u64 size, - int pgsz_idx, - u8 kind_v, - u32 ctag_offset, - u32 flags, - int rw_flag) +u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, + u64 map_offset, + struct sg_table *sgt, + u64 buffer_offset, + u64 size, + int pgsz_idx, + u8 kind_v, + u32 ctag_offset, + u32 flags, + int rw_flag, + bool clear_ctags) { int err = 0, i = 0; bool allocated = false; u32 pde_lo, pde_hi; struct device *d = dev_from_vm(vm); + struct gk20a *g = gk20a_from_vm(vm); + + if (clear_ctags && ctag_offset) { + u32 ctag_lines = ALIGN(size, COMP_TAG_LINE_SIZE) >> + COMP_TAG_LINE_SIZE_SHIFT; + + /* init/clear the ctag buffer */ + g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, + ctag_offset, ctag_offset + ctag_lines - 1); + } /* Allocate (or validate when map_offset != 0) the virtual address. */ if (!map_offset) { @@ -1167,12 +1178,12 @@ fail_alloc: return 0; } -static void __locked_gmmu_unmap(struct vm_gk20a *vm, - u64 vaddr, - u64 size, - int pgsz_idx, - bool va_allocated, - int rw_flag) +void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, + u64 vaddr, + u64 size, + int pgsz_idx, + bool va_allocated, + int rw_flag) { int err = 0; struct gk20a *g = gk20a_from_vm(vm); @@ -1298,6 +1309,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, struct buffer_attrs bfr = {0}; struct gk20a_comptags comptags; u64 buf_addr; + bool clear_ctags = false; mutex_lock(&vm->update_gmmu_lock); @@ -1402,11 +1414,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, bfr.kind_v = bfr.uc_kind_v; } else { gk20a_get_comptags(d, dmabuf, &comptags); - - /* init/clear the ctag buffer */ - g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, - comptags.offset, - comptags.offset + comptags.lines - 1); + clear_ctags = true; } } @@ -1414,15 +1422,15 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, bfr.ctag_offset = comptags.offset; /* update gmmu ptes */ - map_offset = __locked_gmmu_map(vm, map_offset, + map_offset = g->ops.mm.gmmu_map(vm, map_offset, bfr.sgt, buffer_offset, /* sg offset */ mapping_size, bfr.pgsz_idx, bfr.kind_v, bfr.ctag_offset, - flags, rw_flag); - + flags, rw_flag, + clear_ctags); if (!map_offset) goto clean_up; @@ -1531,17 +1539,18 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, u32 flags, int rw_flag) { + struct gk20a *g = gk20a_from_vm(vm); u64 vaddr; mutex_lock(&vm->update_gmmu_lock); - vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */ + vaddr = g->ops.mm.gmmu_map(vm, 0, /* already mapped? - No */ *sgt, /* sg table */ 0, /* sg offset */ size, 0, /* page size index = 0 i.e. SZ_4K */ 0, /* kind */ 0, /* ctag_offset */ - flags, rw_flag); + flags, rw_flag, false); mutex_unlock(&vm->update_gmmu_lock); if (!vaddr) { gk20a_err(dev_from_vm(vm), "failed to allocate va space"); @@ -1549,7 +1558,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, } /* Invalidate kernel mappings immediately */ - gk20a_mm_tlb_invalidate(vm); + g->ops.mm.tlb_invalidate(vm); return vaddr; } @@ -1573,8 +1582,10 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm, u64 size, int rw_flag) { + struct gk20a *g = gk20a_from_vm(vm); + mutex_lock(&vm->update_gmmu_lock); - __locked_gmmu_unmap(vm, + g->ops.mm.gmmu_unmap(vm, vaddr, size, 0, /* page size 4K */ @@ -1970,10 +1981,10 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr, } for (i = 0; i < num_pages; i++) { - u64 page_vaddr = __locked_gmmu_map(vm, vaddr, + u64 page_vaddr = g->ops.mm.gmmu_map(vm, vaddr, vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0, NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET, - gk20a_mem_flag_none); + gk20a_mem_flag_none, false); if (!page_vaddr) { gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!"); @@ -1990,7 +2001,7 @@ err_unmap: /* something went wrong. unmap pages */ while (i--) { vaddr -= pgsz; - __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0, + g->ops.mm.gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0, gk20a_mem_flag_none); } @@ -2005,12 +2016,14 @@ static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr, u64 size, u32 pgsz_idx) { - __locked_gmmu_unmap(vm, vaddr, size, pgsz_idx, - false, gk20a_mem_flag_none); + struct gk20a *g = vm->mm->g; + + g->ops.mm.gmmu_unmap(vm, vaddr, size, pgsz_idx, + false, gk20a_mem_flag_none); } /* NOTE! mapped_buffers lock must be held */ -static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) +void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) { struct vm_gk20a *vm = mapped_buffer->vm; struct gk20a *g = vm->mm->g; @@ -2026,7 +2039,7 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) if (g->ops.mm.put_empty) { g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx); } else { - __locked_gmmu_unmap(vm, + g->ops.mm.gmmu_unmap(vm, mapped_buffer->addr, mapped_buffer->size, mapped_buffer->pgsz_idx, @@ -2036,7 +2049,7 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) num_pages, pgsz_idx, false); } } else - __locked_gmmu_unmap(vm, + g->ops.mm.gmmu_unmap(vm, mapped_buffer->addr, mapped_buffer->size, mapped_buffer->pgsz_idx, @@ -2085,7 +2098,7 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset) mutex_unlock(&vm->update_gmmu_lock); } -static void gk20a_vm_remove_support(struct vm_gk20a *vm) +void gk20a_vm_remove_support(struct vm_gk20a *vm) { struct gk20a *g = vm->mm->g; struct mapped_buffer_node *mapped_buffer; @@ -2156,7 +2169,8 @@ static void gk20a_vm_remove_support(struct vm_gk20a *vm) static void gk20a_vm_remove_support_kref(struct kref *ref) { struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref); - gk20a_vm_remove_support(vm); + struct gk20a *g = gk20a_from_vm(vm); + g->ops.mm.vm_remove(vm); } void gk20a_vm_get(struct vm_gk20a *vm) @@ -3124,5 +3138,14 @@ void gk20a_init_mm(struct gpu_ops *gops) gops->mm.put_empty = gk20a_vm_put_empty; gops->mm.clear_sparse = gk20a_vm_clear_sparse; gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; + gops->mm.gmmu_map = gk20a_locked_gmmu_map; + gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; + gops->mm.vm_remove = gk20a_vm_remove_support; + gops->mm.vm_alloc_share = gk20a_vm_alloc_share; + gops->mm.vm_bind_channel = gk20a_vm_bind_channel; + gops->mm.fb_flush = gk20a_mm_fb_flush; + gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; + gops->mm.l2_flush = gk20a_mm_l2_flush; + gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index b8726c62..f06c465a 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -318,6 +318,10 @@ struct vm_gk20a { dma_addr_t zero_page_iova; void *zero_page_cpuva; struct sg_table *zero_page_sgt; + +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + u64 handle; +#endif }; struct gk20a; @@ -438,11 +442,30 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, u32 flags, int rw_flag); +u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, + u64 map_offset, + struct sg_table *sgt, + u64 buffer_offset, + u64 size, + int pgsz_idx, + u8 kind_v, + u32 ctag_offset, + u32 flags, + int rw_flag, + bool clear_ctags); + void gk20a_gmmu_unmap(struct vm_gk20a *vm, u64 vaddr, u64 size, int rw_flag); +void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, + u64 vaddr, + u64 size, + int pgsz_idx, + bool va_allocated, + int rw_flag); + struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, struct sg_table *sgt); @@ -461,6 +484,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, /* unmap handle from kernel */ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); +void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); + /* get reference to all currently mapped buffers */ int gk20a_vm_get_buffers(struct vm_gk20a *vm, struct mapped_buffer_node ***mapped_buffers, @@ -482,6 +507,16 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, void gk20a_vm_get(struct vm_gk20a *vm); void gk20a_vm_put(struct vm_gk20a *vm); +void gk20a_vm_remove_support(struct vm_gk20a *vm); + +u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, + u64 size, + enum gmmu_pgsz_gk20a gmmu_pgsz_idx); + +int gk20a_vm_free_va(struct vm_gk20a *vm, + u64 offset, u64 size, + enum gmmu_pgsz_gk20a pgsz_idx); + /* vm-as interface */ struct nvhost_as_alloc_space_args; struct nvhost_as_free_space_args; diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h index 6dd0c0db..e6ed9898 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h @@ -151,6 +151,12 @@ struct gk20a_platform { * of the CPU. */ void (*dump_platform_dependencies)(struct platform_device *dev); + +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + bool virtual_dev; + u64 virt_handle; + struct task_struct *intr_handler; +#endif }; static inline struct gk20a_platform *gk20a_get_platform( @@ -163,6 +169,9 @@ extern struct gk20a_platform gk20a_generic_platform; #ifdef CONFIG_TEGRA_GK20A extern struct gk20a_platform gk20a_tegra_platform; extern struct gk20a_platform gm20b_tegra_platform; +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION +extern struct gk20a_platform vgpu_tegra_platform; +#endif #endif static inline bool gk20a_platform_has_syncpoints(struct platform_device *dev) diff --git a/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c new file mode 100644 index 00000000..ea4fde79 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c @@ -0,0 +1,64 @@ +/* + * Tegra Virtualized GPU Platform Interface + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include + +#include "gk20a.h" +#include "hal_gk20a.h" +#include "platform_gk20a.h" + +static int gk20a_tegra_probe(struct platform_device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct device_node *np = dev->dev.of_node; + const __be32 *host1x_ptr; + struct platform_device *host1x_pdev = NULL; + + host1x_ptr = of_get_property(np, "nvidia,host1x", NULL); + if (host1x_ptr) { + struct device_node *host1x_node = + of_find_node_by_phandle(be32_to_cpup(host1x_ptr)); + + host1x_pdev = of_find_device_by_node(host1x_node); + if (!host1x_pdev) { + dev_warn(&dev->dev, "host1x device not available"); + return -EPROBE_DEFER; + } + + } else { + host1x_pdev = to_platform_device(dev->dev.parent); + dev_warn(&dev->dev, "host1x reference not found. assuming host1x to be parent"); + } + + platform->g->host1x_dev = host1x_pdev; + + return 0; +} + +struct gk20a_platform vgpu_tegra_platform = { + .has_syncpoints = true, + + /* power management configuration */ + .can_railgate = false, + .enable_slcg = false, + .enable_blcg = false, + .enable_elcg = false, + .enable_elpg = false, + .enable_aelpg = false, + + .probe = gk20a_tegra_probe, + + .virtual_dev = true, +}; diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index 7e580136..86d049cf 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c @@ -102,5 +102,14 @@ static void gm20b_fifo_trigger_mmu_fault(struct gk20a *g, void gm20b_init_fifo(struct gpu_ops *gops) { gops->fifo.bind_channel = channel_gm20b_bind; + gops->fifo.unbind_channel = channel_gk20a_unbind; + gops->fifo.disable_channel = channel_gk20a_disable; + gops->fifo.alloc_inst = channel_gk20a_alloc_inst; + gops->fifo.free_inst = channel_gk20a_free_inst; + gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; + + gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; + gops->fifo.update_runlist = gk20a_fifo_update_runlist; gops->fifo.trigger_mmu_fault = gm20b_fifo_trigger_mmu_fault; + gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; } diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 72500b0e..c9c32b9f 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -751,4 +751,9 @@ void gm20b_init_gr(struct gpu_ops *gops) gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; #endif gops->gr.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask; + gops->gr.free_channel_ctx = gk20a_free_channel_ctx; + gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx; + gops->gr.free_obj_ctx = gk20a_free_obj_ctx; + gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull; + gops->gr.get_zcull_info = gr_gk20a_get_zcull_info; } diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index ac82d56a..ed5b5e0d 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c @@ -327,4 +327,13 @@ void gm20b_init_mm(struct gpu_ops *gops) gops->mm.set_sparse = gm20b_vm_put_sparse; gops->mm.clear_sparse = gm20b_vm_clear_sparse; gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled; + gops->mm.gmmu_map = gk20a_locked_gmmu_map; + gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; + gops->mm.vm_remove = gk20a_vm_remove_support; + gops->mm.vm_alloc_share = gk20a_vm_alloc_share; + gops->mm.vm_bind_channel = gk20a_vm_bind_channel; + gops->mm.fb_flush = gk20a_mm_fb_flush; + gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; + gops->mm.l2_flush = gk20a_mm_l2_flush; + gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; } diff --git a/drivers/gpu/nvgpu/vgpu/Makefile b/drivers/gpu/nvgpu/vgpu/Makefile new file mode 100644 index 00000000..edad7171 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/Makefile @@ -0,0 +1,10 @@ +GCOV_PROFILE := y +ccflags-y += -Idrivers/gpu/nvgpu +ccflags-y += -Wno-multichar + +obj-$(CONFIG_TEGRA_GR_VIRTUALIZATION) = \ + ltc_vgpu.o \ + gr_vgpu.o \ + fifo_vgpu.o \ + mm_vgpu.o \ + vgpu.o diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c new file mode 100644 index 00000000..23dec1f3 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c @@ -0,0 +1,569 @@ +/* + * Virtualized GPU Fifo + * + * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include "vgpu/vgpu.h" +#include "gk20a/hw_fifo_gk20a.h" +#include "gk20a/hw_ram_gk20a.h" + +static void vgpu_channel_bind(struct channel_gk20a *ch) +{ + struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + + gk20a_dbg_info("bind channel %d", ch->hw_chid); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND; + msg.handle = platform->virt_handle; + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + ch->bound = true; +} + +static void vgpu_channel_unbind(struct channel_gk20a *ch) +{ + struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); + + gk20a_dbg_fn(""); + + if (ch->bound) { + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNBIND; + msg.handle = platform->virt_handle; + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + } + + ch->bound = false; + + /* + * if we are agrressive then we can destroy the syncpt + * resource at this point + * if not, then it will be destroyed at channel_free() + */ + if (ch->sync && ch->sync->aggressive_destroy) { + ch->sync->destroy(ch->sync); + ch->sync = NULL; + } +} + +static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX; + msg.handle = platform->virt_handle; + p->id = ch->hw_chid; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + gk20a_err(dev_from_gk20a(g), "fail"); + return -ENOMEM; + } + + ch->virt_ctx = p->handle; + gk20a_dbg_fn("done"); + return 0; +} + +static void vgpu_channel_free_inst(struct gk20a *g, struct channel_gk20a *ch) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWCTX; + msg.handle = platform->virt_handle; + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); +} + +static void vgpu_channel_disable(struct channel_gk20a *ch) +{ + struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_DISABLE; + msg.handle = platform->virt_handle; + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); +} + +static int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base, + u32 gpfifo_entries) +{ + struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); + struct device __maybe_unused *d = dev_from_gk20a(ch->g); + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_ramfc_params *p = &msg.params.ramfc; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SETUP_RAMFC; + msg.handle = platform->virt_handle; + p->handle = ch->virt_ctx; + p->gpfifo_va = gpfifo_base; + p->num_entries = gpfifo_entries; + p->userd_addr = ch->userd_iova; + p->iova = mapping ? 1 : 0; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? -ENOMEM : 0; +} + +static int init_engine_info(struct fifo_gk20a *f) +{ + struct fifo_engine_info_gk20a *gr_info; + const u32 gr_sw_id = ENGINE_GR_GK20A; + + gk20a_dbg_fn(""); + + /* all we really care about finding is the graphics entry */ + /* especially early on in sim it probably thinks it has more */ + f->num_engines = 1; + + gr_info = f->engine_info + gr_sw_id; + + gr_info->sw_id = gr_sw_id; + gr_info->name = "gr"; + /* FIXME: retrieve this from server */ + gr_info->runlist_id = 0; + return 0; +} + +static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) +{ + struct fifo_engine_info_gk20a *engine_info; + struct fifo_runlist_info_gk20a *runlist; + struct device *d = dev_from_gk20a(g); + u32 runlist_id; + u32 i; + u64 runlist_size; + + gk20a_dbg_fn(""); + + f->max_runlists = fifo_eng_runlist_base__size_1_v(); + f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) * + f->max_runlists, GFP_KERNEL); + if (!f->runlist_info) + goto clean_up; + + engine_info = f->engine_info + ENGINE_GR_GK20A; + runlist_id = engine_info->runlist_id; + runlist = &f->runlist_info[runlist_id]; + + runlist->active_channels = + kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), + GFP_KERNEL); + if (!runlist->active_channels) + goto clean_up_runlist_info; + + runlist_size = sizeof(u16) * f->num_channels; + for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { + dma_addr_t iova; + + runlist->mem[i].cpuva = + dma_alloc_coherent(d, + runlist_size, + &iova, + GFP_KERNEL); + if (!runlist->mem[i].cpuva) { + dev_err(d, "memory allocation failed\n"); + goto clean_up_runlist; + } + runlist->mem[i].iova = iova; + runlist->mem[i].size = runlist_size; + } + mutex_init(&runlist->mutex); + init_waitqueue_head(&runlist->runlist_wq); + + /* None of buffers is pinned if this value doesn't change. + Otherwise, one of them (cur_buffer) must have been pinned. */ + runlist->cur_buffer = MAX_RUNLIST_BUFFERS; + + gk20a_dbg_fn("done"); + return 0; + +clean_up_runlist: + for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { + if (runlist->mem[i].cpuva) + dma_free_coherent(d, + runlist->mem[i].size, + runlist->mem[i].cpuva, + runlist->mem[i].iova); + runlist->mem[i].cpuva = NULL; + runlist->mem[i].iova = 0; + } + + kfree(runlist->active_channels); + runlist->active_channels = NULL; + +clean_up_runlist_info: + kfree(f->runlist_info); + f->runlist_info = NULL; + +clean_up: + gk20a_dbg_fn("fail"); + return -ENOMEM; +} + +static int vgpu_init_fifo_setup_sw(struct gk20a *g) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct fifo_gk20a *f = &g->fifo; + struct device *d = dev_from_gk20a(g); + int chid, err = 0; + dma_addr_t iova; + + gk20a_dbg_fn(""); + + if (f->sw_ready) { + gk20a_dbg_fn("skip init"); + return 0; + } + + f->g = g; + + err = vgpu_get_attribute(platform->virt_handle, + TEGRA_VGPU_ATTRIB_NUM_CHANNELS, + &f->num_channels); + if (err) + return -ENXIO; + + f->max_engines = ENGINE_INVAL_GK20A; + + f->userd_entry_size = 1 << ram_userd_base_shift_v(); + f->userd_total_size = f->userd_entry_size * f->num_channels; + + f->userd.cpuva = dma_alloc_coherent(d, + f->userd_total_size, + &iova, + GFP_KERNEL); + if (!f->userd.cpuva) { + dev_err(d, "memory allocation failed\n"); + goto clean_up; + } + + f->userd.iova = iova; + err = gk20a_get_sgtable(d, &f->userd.sgt, + f->userd.cpuva, f->userd.iova, + f->userd_total_size); + if (err) { + dev_err(d, "failed to create sg table\n"); + goto clean_up; + } + + /* bar1 va */ + f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size); + if (!f->userd.gpu_va) { + dev_err(d, "gmmu mapping failed\n"); + goto clean_up; + } + + gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); + + f->userd.size = f->userd_total_size; + + f->channel = kzalloc(f->num_channels * sizeof(*f->channel), + GFP_KERNEL); + f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info), + GFP_KERNEL); + + if (!(f->channel && f->engine_info)) { + err = -ENOMEM; + goto clean_up; + } + + init_engine_info(f); + + init_runlist(g, f); + + for (chid = 0; chid < f->num_channels; chid++) { + f->channel[chid].userd_cpu_va = + f->userd.cpuva + chid * f->userd_entry_size; + f->channel[chid].userd_iova = + NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova) + + chid * f->userd_entry_size; + f->channel[chid].userd_gpu_va = + f->userd.gpu_va + chid * f->userd_entry_size; + + gk20a_init_channel_support(g, chid); + } + mutex_init(&f->ch_inuse_mutex); + + f->deferred_reset_pending = false; + mutex_init(&f->deferred_reset_mutex); + + f->sw_ready = true; + + gk20a_dbg_fn("done"); + return 0; + +clean_up: + gk20a_dbg_fn("fail"); + /* FIXME: unmap from bar1 */ + if (f->userd.sgt) + gk20a_free_sgtable(&f->userd.sgt); + if (f->userd.cpuva) + dma_free_coherent(d, + f->userd_total_size, + f->userd.cpuva, + f->userd.iova); + f->userd.cpuva = NULL; + f->userd.iova = 0; + + memset(&f->userd, 0, sizeof(struct userd_desc)); + + kfree(f->channel); + f->channel = NULL; + kfree(f->engine_info); + f->engine_info = NULL; + + return err; +} + +static int vgpu_init_fifo_setup_hw(struct gk20a *g) +{ + gk20a_dbg_fn(""); + + /* test write, read through bar1 @ userd region before + * turning on the snooping */ + { + struct fifo_gk20a *f = &g->fifo; + u32 v, v1 = 0x33, v2 = 0x55; + + u32 bar1_vaddr = f->userd.gpu_va; + volatile u32 *cpu_vaddr = f->userd.cpuva; + + gk20a_dbg_info("test bar1 @ vaddr 0x%x", + bar1_vaddr); + + v = gk20a_bar1_readl(g, bar1_vaddr); + + *cpu_vaddr = v1; + smp_mb(); + + if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { + gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!"); + return -EINVAL; + } + + gk20a_bar1_writel(g, bar1_vaddr, v2); + + if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) { + gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!"); + return -EINVAL; + } + + /* is it visible to the cpu? */ + if (*cpu_vaddr != v2) { + gk20a_err(dev_from_gk20a(g), + "cpu didn't see bar1 write @ %p!", + cpu_vaddr); + } + + /* put it back */ + gk20a_bar1_writel(g, bar1_vaddr, v); + } + + gk20a_dbg_fn("done"); + + return 0; +} + +int vgpu_init_fifo_support(struct gk20a *g) +{ + u32 err; + + gk20a_dbg_fn(""); + + err = vgpu_init_fifo_setup_sw(g); + if (err) + return err; + + err = vgpu_init_fifo_setup_hw(g); + return err; +} + +static int vgpu_fifo_preempt_channel(struct gk20a *g, u32 hw_chid) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct fifo_gk20a *f = &g->fifo; + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT; + msg.handle = platform->virt_handle; + p->handle = f->channel[hw_chid].virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + if (err || msg.ret) { + gk20a_err(dev_from_gk20a(g), + "preempt channel %d failed\n", hw_chid); + err = -ENOMEM; + } + + return err; +} + +static int vgpu_submit_runlist(u64 handle, u8 runlist_id, u16 *runlist, + u32 num_entries) +{ + struct tegra_vgpu_cmd_msg *msg; + struct tegra_vgpu_runlist_params *p; + size_t size = sizeof(*msg) + sizeof(*runlist) * num_entries; + char *ptr; + int err; + + msg = kmalloc(size, GFP_KERNEL); + if (!msg) + return -1; + + msg->cmd = TEGRA_VGPU_CMD_SUBMIT_RUNLIST; + msg->handle = handle; + p = &msg->params.runlist; + p->runlist_id = runlist_id; + p->num_entries = num_entries; + + ptr = (char *)msg + sizeof(*msg); + memcpy(ptr, runlist, sizeof(*runlist) * num_entries); + err = vgpu_comm_sendrecv(msg, size, sizeof(*msg)); + + err = (err || msg->ret) ? -1 : 0; + kfree(msg); + return err; +} + +static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, + u32 hw_chid, bool add, + bool wait_for_finish) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct fifo_gk20a *f = &g->fifo; + struct fifo_runlist_info_gk20a *runlist; + u16 *runlist_entry = NULL; + u32 count = 0; + + gk20a_dbg_fn(""); + + runlist = &f->runlist_info[runlist_id]; + + /* valid channel, add/remove it from active list. + Otherwise, keep active list untouched for suspend/resume. */ + if (hw_chid != ~0) { + if (add) { + if (test_and_set_bit(hw_chid, + runlist->active_channels) == 1) + return 0; + } else { + if (test_and_clear_bit(hw_chid, + runlist->active_channels) == 0) + return 0; + } + } + + if (hw_chid != ~0 || /* add/remove a valid channel */ + add /* resume to add all channels back */) { + u32 chid; + + runlist_entry = runlist->mem[0].cpuva; + for_each_set_bit(chid, + runlist->active_channels, f->num_channels) { + gk20a_dbg_info("add channel %d to runlist", chid); + runlist_entry[0] = chid; + runlist_entry++; + count++; + } + } else /* suspend to remove all channels */ + count = 0; + + return vgpu_submit_runlist(platform->virt_handle, runlist_id, + runlist->mem[0].cpuva, count); +} + +/* add/remove a channel from runlist + special cases below: runlist->active_channels will NOT be changed. + (hw_chid == ~0 && !add) means remove all active channels from runlist. + (hw_chid == ~0 && add) means restore all active channels on runlist. */ +static int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id, + u32 hw_chid, bool add, bool wait_for_finish) +{ + struct fifo_runlist_info_gk20a *runlist = NULL; + struct fifo_gk20a *f = &g->fifo; + u32 ret = 0; + + gk20a_dbg_fn(""); + + runlist = &f->runlist_info[runlist_id]; + + mutex_lock(&runlist->mutex); + + ret = vgpu_fifo_update_runlist_locked(g, runlist_id, hw_chid, add, + wait_for_finish); + + mutex_unlock(&runlist->mutex); + return ret; +} + +static int vgpu_fifo_wait_engine_idle(struct gk20a *g) +{ + gk20a_dbg_fn(""); + + return 0; +} + +void vgpu_init_fifo_ops(struct gpu_ops *gops) +{ + gops->fifo.bind_channel = vgpu_channel_bind; + gops->fifo.unbind_channel = vgpu_channel_unbind; + gops->fifo.disable_channel = vgpu_channel_disable; + gops->fifo.alloc_inst = vgpu_channel_alloc_inst; + gops->fifo.free_inst = vgpu_channel_free_inst; + gops->fifo.setup_ramfc = vgpu_channel_setup_ramfc; + gops->fifo.preempt_channel = vgpu_fifo_preempt_channel; + gops->fifo.update_runlist = vgpu_fifo_update_runlist; + gops->fifo.wait_engine_idle = vgpu_fifo_wait_engine_idle; +} + diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c new file mode 100644 index 00000000..a7e966da --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -0,0 +1,687 @@ +/* + * Virtualized GPU Graphics + * + * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include "vgpu/vgpu.h" +#include "gk20a/hw_gr_gk20a.h" + +static int vgpu_gr_commit_inst(struct channel_gk20a *c, u64 gpu_va) +{ + struct gk20a_platform *platform = gk20a_get_platform(c->g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_CTX; + msg.handle = platform->virt_handle; + p->handle = c->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? -1 : 0; +} + +static int vgpu_gr_commit_global_ctx_buffers(struct gk20a *g, + struct channel_gk20a *c, bool patch) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_GLOBAL_CTX; + msg.handle = platform->virt_handle; + p->handle = c->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? -1 : 0; +} + +/* load saved fresh copy of gloden image into channel gr_ctx */ +static int vgpu_gr_load_golden_ctx_image(struct gk20a *g, + struct channel_gk20a *c) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_LOAD_GR_GOLDEN_CTX; + msg.handle = platform->virt_handle; + p->handle = c->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? -1 : 0; +} + +static int vgpu_gr_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + + gk20a_dbg_fn(""); + + vgpu_get_attribute(platform->virt_handle, + TEGRA_VGPU_ATTRIB_GOLDEN_CTX_SIZE, + &g->gr.ctx_vars.golden_image_size); + vgpu_get_attribute(platform->virt_handle, + TEGRA_VGPU_ATTRIB_ZCULL_CTX_SIZE, + &g->gr.ctx_vars.zcull_ctxsw_image_size); + if (!g->gr.ctx_vars.golden_image_size || + !g->gr.ctx_vars.zcull_ctxsw_image_size) + return -ENXIO; + + gr->ctx_vars.buffer_size = g->gr.ctx_vars.golden_image_size; + g->gr.ctx_vars.priv_access_map_size = 512 * 1024; + return 0; +} + +static int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + int attr_buffer_size; + + u32 cb_buffer_size = gr->bundle_cb_default_size * + gr_scc_bundle_cb_size_div_256b_byte_granularity_v(); + + u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() * + gr_scc_pagepool_total_pages_byte_granularity_v(); + + gk20a_dbg_fn(""); + + attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g); + + gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size); + gr->global_ctx_buffer[CIRCULAR].size = cb_buffer_size; + + gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size); + gr->global_ctx_buffer[PAGEPOOL].size = pagepool_buffer_size; + + gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size); + gr->global_ctx_buffer[ATTRIBUTE].size = attr_buffer_size; + + gk20a_dbg_info("priv access map size : %d", + gr->ctx_vars.priv_access_map_size); + gr->global_ctx_buffer[PRIV_ACCESS_MAP].size = + gr->ctx_vars.priv_access_map_size; + + return 0; +} + +static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, + struct channel_gk20a *c) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; + struct vm_gk20a *ch_vm = c->vm; + u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; + u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; + struct gr_gk20a *gr = &g->gr; + u64 gpu_va; + u32 i; + int err; + + gk20a_dbg_fn(""); + + /* FIXME: add VPR support */ + + /* Circular Buffer */ + gpu_va = gk20a_vm_alloc_va(ch_vm, + gr->global_ctx_buffer[CIRCULAR].size, 0); + + if (!gpu_va) + goto clean_up; + g_bfr_va[CIRCULAR_VA] = gpu_va; + g_bfr_size[CIRCULAR_VA] = gr->global_ctx_buffer[CIRCULAR].size; + + /* Attribute Buffer */ + gpu_va = gk20a_vm_alloc_va(ch_vm, + gr->global_ctx_buffer[ATTRIBUTE].size, 0); + + if (!gpu_va) + goto clean_up; + g_bfr_va[ATTRIBUTE_VA] = gpu_va; + g_bfr_size[ATTRIBUTE_VA] = gr->global_ctx_buffer[ATTRIBUTE].size; + + /* Page Pool */ + gpu_va = gk20a_vm_alloc_va(ch_vm, + gr->global_ctx_buffer[PAGEPOOL].size, 0); + if (!gpu_va) + goto clean_up; + g_bfr_va[PAGEPOOL_VA] = gpu_va; + g_bfr_size[PAGEPOOL_VA] = gr->global_ctx_buffer[PAGEPOOL].size; + + /* Priv register Access Map */ + gpu_va = gk20a_vm_alloc_va(ch_vm, + gr->global_ctx_buffer[PRIV_ACCESS_MAP].size, 0); + if (!gpu_va) + goto clean_up; + g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; + g_bfr_size[PRIV_ACCESS_MAP_VA] = + gr->global_ctx_buffer[PRIV_ACCESS_MAP].size; + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_MAP_GR_GLOBAL_CTX; + msg.handle = platform->virt_handle; + p->handle = c->virt_ctx; + p->cb_va = g_bfr_va[CIRCULAR_VA]; + p->attr_va = g_bfr_va[ATTRIBUTE_VA]; + p->page_pool_va = g_bfr_va[PAGEPOOL_VA]; + p->priv_access_map_va = g_bfr_va[PRIV_ACCESS_MAP_VA]; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) + goto clean_up; + + c->ch_ctx.global_ctx_buffer_mapped = true; + return 0; + + clean_up: + for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { + if (g_bfr_va[i]) { + gk20a_vm_free_va(ch_vm, g_bfr_va[i], + g_bfr_size[i], 0); + g_bfr_va[i] = 0; + } + } + return -ENOMEM; +} + +static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c) +{ + struct gk20a_platform *platform = gk20a_get_platform(c->g->dev); + struct vm_gk20a *ch_vm = c->vm; + u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; + u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; + u32 i; + + gk20a_dbg_fn(""); + + if (c->ch_ctx.global_ctx_buffer_mapped) { + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; + int err; + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX; + msg.handle = platform->virt_handle; + p->handle = c->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + } + + for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { + if (g_bfr_va[i]) { + gk20a_vm_free_va(ch_vm, g_bfr_va[i], g_bfr_size[i], 0); + g_bfr_va[i] = 0; + g_bfr_size[i] = 0; + } + } + c->ch_ctx.global_ctx_buffer_mapped = false; +} + +static int vgpu_gr_alloc_channel_gr_ctx(struct gk20a *g, + struct channel_gk20a *c) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; + struct gr_gk20a *gr = &g->gr; + struct gr_ctx_desc *gr_ctx; + struct vm_gk20a *ch_vm = c->vm; + int err; + + gk20a_dbg_fn(""); + + if (gr->ctx_vars.buffer_size == 0) + return 0; + + /* alloc channel gr ctx buffer */ + gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; + gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; + + gr_ctx = kzalloc(sizeof(*gr_ctx), GFP_KERNEL); + if (!gr_ctx) + return -ENOMEM; + + gr_ctx->size = gr->ctx_vars.buffer_total_size; + gr_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, gr_ctx->size, 0); + + if (!gr_ctx->gpu_va) { + kfree(gr_ctx); + return -ENOMEM; + } + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_CTX; + msg.handle = platform->virt_handle; + p->handle = c->virt_ctx; + p->gr_ctx_va = gr_ctx->gpu_va; + p->class_num = c->obj_class; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + if (err || msg.ret) { + gk20a_vm_free_va(ch_vm, gr_ctx->gpu_va, gr_ctx->size, 0); + err = -ENOMEM; + } else + c->ch_ctx.gr_ctx = gr_ctx; + + return err; +} + +static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c) +{ + struct gk20a_platform *platform = gk20a_get_platform(c->g->dev); + struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct vm_gk20a *ch_vm = c->vm; + + gk20a_dbg_fn(""); + + if (ch_ctx->gr_ctx && ch_ctx->gr_ctx->gpu_va) { + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; + int err; + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_CTX; + msg.handle = platform->virt_handle; + p->handle = c->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + gk20a_vm_free_va(ch_vm, ch_ctx->gr_ctx->gpu_va, + ch_ctx->gr_ctx->size, 0); + ch_ctx->gr_ctx->gpu_va = 0; + kfree(ch_ctx->gr_ctx); + } +} + +static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, + struct channel_gk20a *c) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; + struct vm_gk20a *ch_vm = c->vm; + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; + int err; + + gk20a_dbg_fn(""); + + patch_ctx->size = 128 * sizeof(u32); + patch_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, patch_ctx->size, 0); + if (!patch_ctx->gpu_va) + return -ENOMEM; + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_PATCH_CTX; + msg.handle = platform->virt_handle; + p->handle = c->virt_ctx; + p->patch_ctx_va = patch_ctx->gpu_va; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0); + err = -ENOMEM; + } + + return err; +} + +static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c) +{ + struct gk20a_platform *platform = gk20a_get_platform(c->g->dev); + struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; + struct vm_gk20a *ch_vm = c->vm; + + gk20a_dbg_fn(""); + + if (patch_ctx->gpu_va) { + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; + int err; + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX; + msg.handle = platform->virt_handle; + p->handle = c->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0); + patch_ctx->gpu_va = 0; + } +} + +static void vgpu_gr_free_channel_ctx(struct channel_gk20a *c) +{ + gk20a_dbg_fn(""); + + vgpu_gr_unmap_global_ctx_buffers(c); + vgpu_gr_free_channel_patch_ctx(c); + if (!gk20a_is_channel_marked_as_tsg(c)) + vgpu_gr_free_channel_gr_ctx(c); + + /* zcull_ctx, pm_ctx */ + + memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); + + c->num_objects = 0; + c->first_init = false; +} + +static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, + struct nvhost_alloc_obj_ctx_args *args) +{ + struct gk20a *g = c->g; + struct fifo_gk20a *f = &g->fifo; + struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct tsg_gk20a *tsg = NULL; + int err = 0; + + gk20a_dbg_fn(""); + + /* an address space needs to have been bound at this point.*/ + if (!gk20a_channel_as_bound(c)) { + gk20a_err(dev_from_gk20a(g), + "not bound to address space at time" + " of grctx allocation"); + return -EINVAL; + } + + if (!g->ops.gr.is_valid_class(g, args->class_num)) { + gk20a_err(dev_from_gk20a(g), + "invalid obj class 0x%x", args->class_num); + err = -EINVAL; + goto out; + } + c->obj_class = args->class_num; + + /* FIXME: add TSG support */ + if (gk20a_is_channel_marked_as_tsg(c)) + tsg = &f->tsg[c->tsgid]; + + /* allocate gr ctx buffer */ + if (!ch_ctx->gr_ctx) { + err = vgpu_gr_alloc_channel_gr_ctx(g, c); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to allocate gr ctx buffer"); + goto out; + } + } else { + /*TBD: needs to be more subtle about which is + * being allocated as some are allowed to be + * allocated along same channel */ + gk20a_err(dev_from_gk20a(g), + "too many classes alloc'd on same channel"); + err = -EINVAL; + goto out; + } + + /* commit gr ctx buffer */ + err = vgpu_gr_commit_inst(c, ch_ctx->gr_ctx->gpu_va); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to commit gr ctx buffer"); + goto out; + } + + /* allocate patch buffer */ + if (ch_ctx->patch_ctx.pages == NULL) { + err = vgpu_gr_alloc_channel_patch_ctx(g, c); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to allocate patch buffer"); + goto out; + } + } + + /* map global buffer to channel gpu_va and commit */ + if (!ch_ctx->global_ctx_buffer_mapped) { + err = vgpu_gr_map_global_ctx_buffers(g, c); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to map global ctx buffer"); + goto out; + } + gr_gk20a_elpg_protected_call(g, + vgpu_gr_commit_global_ctx_buffers(g, c, true)); + } + + /* load golden image */ + if (!c->first_init) { + err = gr_gk20a_elpg_protected_call(g, + vgpu_gr_load_golden_ctx_image(g, c)); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to load golden ctx image"); + goto out; + } + c->first_init = true; + } + + c->num_objects++; + + gk20a_dbg_fn("done"); + return 0; +out: + /* 1. gr_ctx, patch_ctx and global ctx buffer mapping + can be reused so no need to release them. + 2. golden image load is a one time thing so if + they pass, no need to undo. */ + gk20a_err(dev_from_gk20a(g), "fail"); + return err; +} + +static int vgpu_gr_free_obj_ctx(struct channel_gk20a *c, + struct nvhost_free_obj_ctx_args *args) +{ + unsigned long timeout = gk20a_get_gr_idle_timeout(c->g); + + gk20a_dbg_fn(""); + + if (c->num_objects == 0) + return 0; + + c->num_objects--; + + if (c->num_objects == 0) { + c->first_init = false; + gk20a_disable_channel(c, + !c->has_timedout, + timeout); + } + + return 0; +} + +static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + + gk20a_dbg_fn(""); + + if (vgpu_get_attribute(platform->virt_handle, + TEGRA_VGPU_ATTRIB_GPC_COUNT, &gr->gpc_count)) + return -ENOMEM; + + if (vgpu_get_attribute(platform->virt_handle, + TEGRA_VGPU_ATTRIB_MAX_TPC_PER_GPC_COUNT, + &gr->max_tpc_per_gpc_count)) + return -ENOMEM; + + if (vgpu_get_attribute(platform->virt_handle, + TEGRA_VGPU_ATTRIB_MAX_TPC_COUNT, + &gr->max_tpc_count)) + return -ENOMEM; + + g->ops.gr.bundle_cb_defaults(g); + g->ops.gr.cb_size_default(g); + g->ops.gr.calc_global_ctx_buffer_size(g); + return 0; +} + +static int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, + struct channel_gk20a *c, u64 zcull_va, + u32 mode) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_zcull_bind_params *p = &msg.params.zcull_bind; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_ZCULL; + msg.handle = platform->virt_handle; + p->handle = c->virt_ctx; + p->zcull_va = zcull_va; + p->mode = mode; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? -ENOMEM : 0; +} + +static int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, + struct gr_zcull_info *zcull_params) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_zcull_info_params *p = &msg.params.zcull_info; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_GET_ZCULL_INFO; + msg.handle = platform->virt_handle; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) + return -ENOMEM; + + zcull_params->width_align_pixels = p->width_align_pixels; + zcull_params->height_align_pixels = p->height_align_pixels; + zcull_params->pixel_squares_by_aliquots = p->pixel_squares_by_aliquots; + zcull_params->aliquot_total = p->aliquot_total; + zcull_params->region_byte_multiplier = p->region_byte_multiplier; + zcull_params->region_header_size = p->region_header_size; + zcull_params->subregion_header_size = p->subregion_header_size; + zcull_params->subregion_width_align_pixels = + p->subregion_width_align_pixels; + zcull_params->subregion_height_align_pixels = + p->subregion_height_align_pixels; + zcull_params->subregion_count = p->subregion_count; + + return 0; +} + +static void vgpu_remove_gr_support(struct gr_gk20a *gr) +{ + gk20a_dbg_fn(""); + + gk20a_allocator_destroy(&gr->comp_tags); +} + +static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + int err; + + gk20a_dbg_fn(""); + + if (gr->sw_ready) { + gk20a_dbg_fn("skip init"); + return 0; + } + + gr->g = g; + + err = vgpu_gr_init_gr_config(g, gr); + if (err) + goto clean_up; + + err = vgpu_gr_init_ctx_state(g, gr); + if (err) + goto clean_up; + + err = g->ops.ltc.init_comptags(g, gr); + if (err) + goto clean_up; + + err = vgpu_gr_alloc_global_ctx_buffers(g); + if (err) + goto clean_up; + + mutex_init(&gr->ctx_mutex); + + gr->remove_support = vgpu_remove_gr_support; + gr->sw_ready = true; + + gk20a_dbg_fn("done"); + return 0; + +clean_up: + gk20a_err(dev_from_gk20a(g), "fail"); + vgpu_remove_gr_support(gr); + return err; +} + +int vgpu_init_gr_support(struct gk20a *g) +{ + gk20a_dbg_fn(""); + + return vgpu_gr_init_gr_setup_sw(g); +} + +struct gr_isr_data { + u32 addr; + u32 data_lo; + u32 data_hi; + u32 curr_ctx; + u32 chid; + u32 offset; + u32 sub_chan; + u32 class_num; +}; + +static int vgpu_gr_handle_notify_pending(struct gk20a *g, + struct gr_isr_data *isr_data) +{ + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch = &f->channel[isr_data->chid]; + + gk20a_dbg_fn(""); + wake_up(&ch->notifier_wq); + return 0; +} + +int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info) +{ + struct gr_isr_data isr_data; + + gk20a_dbg_fn(""); + + isr_data.chid = info->chid; + + if (info->type == TEGRA_VGPU_GR_INTR_NOTIFY) + vgpu_gr_handle_notify_pending(g, &isr_data); + + return 0; +} + +void vgpu_init_gr_ops(struct gpu_ops *gops) +{ + gops->gr.free_channel_ctx = vgpu_gr_free_channel_ctx; + gops->gr.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx; + gops->gr.free_obj_ctx = vgpu_gr_free_obj_ctx; + gops->gr.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull; + gops->gr.get_zcull_info = vgpu_gr_get_zcull_info; +} diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c new file mode 100644 index 00000000..ddff23b7 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c @@ -0,0 +1,55 @@ +/* + * Virtualized GPU L2 + * + * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include "vgpu/vgpu.h" + +static int vgpu_determine_L2_size_bytes(struct gk20a *g) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + u32 cache_size = 0; + + gk20a_dbg_fn(""); + + if (vgpu_get_attribute(platform->virt_handle, + TEGRA_VGPU_ATTRIB_L2_SIZE, &cache_size)) + dev_err(dev_from_gk20a(g), "unable to get L2 size"); + + return cache_size; +} + +static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + u32 max_comptag_lines = 0; + + gk20a_dbg_fn(""); + + vgpu_get_attribute(platform->virt_handle, + TEGRA_VGPU_ATTRIB_COMPTAG_LINES, &max_comptag_lines); + if (max_comptag_lines < 2) + return -ENXIO; + + gk20a_allocator_init(&gr->comp_tags, "comptag", + 1, /* start */ + max_comptag_lines - 1, /* length*/ + 1); /* align */ + return 0; +} + +void vgpu_init_ltc_ops(struct gpu_ops *gops) +{ + gops->ltc.determine_L2_size_bytes = vgpu_determine_L2_size_bytes; + gops->ltc.init_comptags = vgpu_ltc_init_comptags; +} diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c new file mode 100644 index 00000000..6ed1dece --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -0,0 +1,425 @@ +/* + * Virtualized GPU Memory Management + * + * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include "vgpu/vgpu.h" + +/* note: keep the page sizes sorted lowest to highest here */ +static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; +static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 }; + +static int vgpu_init_mm_setup_sw(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + + gk20a_dbg_fn(""); + + if (mm->sw_ready) { + gk20a_dbg_fn("skip init"); + return 0; + } + + mm->g = g; + mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; + mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big]; + mm->pde_stride = mm->big_page_size << 10; + mm->pde_stride_shift = ilog2(mm->pde_stride); + BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */ + + /*TBD: make channel vm size configurable */ + mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; + + gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); + + mm->sw_ready = true; + + return 0; +} + +int vgpu_init_mm_support(struct gk20a *g) +{ + gk20a_dbg_fn(""); + + return vgpu_init_mm_setup_sw(g); +} + +static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, + u64 map_offset, + struct sg_table *sgt, + u64 buffer_offset, + u64 size, + int pgsz_idx, + u8 kind_v, + u32 ctag_offset, + u32 flags, + int rw_flag, + bool clear_ctags) +{ + int err = 0; + struct device *d = dev_from_vm(vm); + struct gk20a *g = gk20a_from_vm(vm); + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_as_map_params *p = &msg.params.as_map; + u64 addr = gk20a_mm_iova_addr(sgt->sgl); + u8 prot; + + gk20a_dbg_fn(""); + + /* Allocate (or validate when map_offset != 0) the virtual address. */ + if (!map_offset) { + map_offset = gk20a_vm_alloc_va(vm, size, + pgsz_idx); + if (!map_offset) { + gk20a_err(d, "failed to allocate va space"); + err = -ENOMEM; + goto fail; + } + } + + if (rw_flag == gk20a_mem_flag_read_only) + prot = TEGRA_VGPU_MAP_PROT_READ_ONLY; + else if (rw_flag == gk20a_mem_flag_write_only) + prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY; + else + prot = TEGRA_VGPU_MAP_PROT_NONE; + + msg.cmd = TEGRA_VGPU_CMD_AS_MAP; + msg.handle = platform->virt_handle; + p->handle = vm->handle; + p->addr = addr; + p->gpu_va = map_offset; + p->size = size; + p->pgsz_idx = pgsz_idx; + p->iova = mapping ? 1 : 0; + p->kind = kind_v; + p->cacheable = + (flags & NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE) ? 1 : 0; + p->prot = prot; + p->ctag_offset = ctag_offset; + p->clear_ctags = clear_ctags; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) + goto fail; + + vm->tlb_dirty = true; + return map_offset; +fail: + gk20a_err(d, "%s: failed with err=%d\n", __func__, err); + return 0; +} + +static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm, + u64 vaddr, + u64 size, + int pgsz_idx, + bool va_allocated, + int rw_flag) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_as_map_params *p = &msg.params.as_map; + int err; + + gk20a_dbg_fn(""); + + if (va_allocated) { + err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx); + if (err) { + dev_err(dev_from_vm(vm), + "failed to free va"); + return; + } + } + + msg.cmd = TEGRA_VGPU_CMD_AS_UNMAP; + msg.handle = platform->virt_handle; + p->handle = vm->handle; + p->gpu_va = vaddr; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) + dev_err(dev_from_vm(vm), + "failed to update gmmu ptes on unmap"); + + vm->tlb_dirty = true; +} + +static void vgpu_vm_remove_support(struct vm_gk20a *vm) +{ + struct gk20a *g = vm->mm->g; + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct mapped_buffer_node *mapped_buffer; + struct vm_reserved_va_node *va_node, *va_node_tmp; + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_as_share_params *p = &msg.params.as_share; + struct rb_node *node; + int err; + + gk20a_dbg_fn(""); + mutex_lock(&vm->update_gmmu_lock); + + /* TBD: add a flag here for the unmap code to recognize teardown + * and short-circuit any otherwise expensive operations. */ + + node = rb_first(&vm->mapped_buffers); + while (node) { + mapped_buffer = + container_of(node, struct mapped_buffer_node, node); + gk20a_vm_unmap_locked(mapped_buffer); + node = rb_first(&vm->mapped_buffers); + } + + /* destroy remaining reserved memory areas */ + list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list, + reserved_va_list) { + list_del(&va_node->reserved_va_list); + kfree(va_node); + } + + msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; + msg.handle = platform->virt_handle; + p->handle = vm->handle; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); + gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); + + mutex_unlock(&vm->update_gmmu_lock); + + /* release zero page if used */ + if (vm->zero_page_cpuva) + dma_free_coherent(&g->dev->dev, vm->mm->big_page_size, + vm->zero_page_cpuva, vm->zero_page_iova); + + /* vm is not used anymore. release it. */ + kfree(vm); +} + +u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(dev_from_gk20a(g)); + u64 addr = gk20a_mm_iova_addr((*sgt)->sgl); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_as_map_params *p = &msg.params.as_map; + int err; + + msg.cmd = TEGRA_VGPU_CMD_MAP_BAR1; + msg.handle = platform->virt_handle; + p->addr = addr; + p->size = size; + p->iova = mapping ? 1 : 0; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) + addr = 0; + else + addr = p->gpu_va; + + return addr; +} + +/* address space interfaces for the gk20a module */ +static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share) +{ + struct gk20a_as *as = as_share->as; + struct gk20a *g = gk20a_from_as(as); + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_as_share_params *p = &msg.params.as_share; + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm; + u64 vma_size; + u32 num_pages, low_hole_pages; + char name[32]; + int err; + + gk20a_dbg_fn(""); + + vm = kzalloc(sizeof(*vm), GFP_KERNEL); + if (!vm) + return -ENOMEM; + + as_share->vm = vm; + + vm->mm = mm; + vm->as_share = as_share; + + vm->big_pages = true; + + vm->va_start = mm->pde_stride; /* create a one pde hole */ + vm->va_limit = mm->channel.size; /* note this means channel.size is + really just the max */ + + msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE; + msg.handle = platform->virt_handle; + p->size = vm->va_limit; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) + return -ENOMEM; + + vm->handle = p->handle; + + /* low-half: alloc small pages */ + /* high-half: alloc big pages */ + vma_size = mm->channel.size >> 1; + + snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, + gmmu_page_sizes[gmmu_page_size_small]>>10); + num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]); + + /* num_pages above is without regard to the low-side hole. */ + low_hole_pages = (vm->va_start >> + gmmu_page_shifts[gmmu_page_size_small]); + + gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name, + low_hole_pages, /* start */ + num_pages - low_hole_pages, /* length */ + 1); /* align */ + + snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, + gmmu_page_sizes[gmmu_page_size_big]>>10); + + num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]); + gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name, + num_pages, /* start */ + num_pages, /* length */ + 1); /* align */ + + vm->mapped_buffers = RB_ROOT; + + mutex_init(&vm->update_gmmu_lock); + kref_init(&vm->ref); + INIT_LIST_HEAD(&vm->reserved_va_list); + + vm->enable_ctag = true; + + return 0; +} + +static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share, + struct channel_gk20a *ch) +{ + struct vm_gk20a *vm = as_share->vm; + struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_as_bind_share_params *p = &msg.params.as_bind_share; + int err; + + gk20a_dbg_fn(""); + + ch->vm = vm; + msg.cmd = TEGRA_VGPU_CMD_AS_BIND_SHARE; + msg.handle = platform->virt_handle; + p->as_handle = vm->handle; + p->chan_handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + if (err || msg.ret) { + ch->vm = NULL; + err = -ENOMEM; + } + + return err; +} + +static void vgpu_cache_maint(u64 handle, u8 op) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_cache_maint_params *p = &msg.params.cache_maint; + int err; + + msg.cmd = TEGRA_VGPU_CMD_CACHE_MAINT; + msg.handle = handle; + p->op = op; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); +} + +static int vgpu_mm_fb_flush(struct gk20a *g) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + + gk20a_dbg_fn(""); + + vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_FB_FLUSH); + return 0; +} + +static void vgpu_mm_l2_invalidate(struct gk20a *g) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + + gk20a_dbg_fn(""); + + vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_L2_MAINT_INV); +} + +static void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + u8 op; + + gk20a_dbg_fn(""); + + if (invalidate) + op = TEGRA_VGPU_L2_MAINT_FLUSH_INV; + else + op = TEGRA_VGPU_L2_MAINT_FLUSH; + + vgpu_cache_maint(platform->virt_handle, op); +} + +static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_as_invalidate_params *p = &msg.params.as_invalidate; + int err; + + gk20a_dbg_fn(""); + + /* No need to invalidate if tlb is clean */ + mutex_lock(&vm->update_gmmu_lock); + if (!vm->tlb_dirty) { + mutex_unlock(&vm->update_gmmu_lock); + return; + } + + msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE; + msg.handle = platform->virt_handle; + p->handle = vm->handle; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + vm->tlb_dirty = false; + mutex_unlock(&vm->update_gmmu_lock); +} + +void vgpu_init_mm_ops(struct gpu_ops *gops) +{ + gops->mm.gmmu_map = vgpu_locked_gmmu_map; + gops->mm.gmmu_unmap = vgpu_locked_gmmu_unmap; + gops->mm.vm_remove = vgpu_vm_remove_support; + gops->mm.vm_alloc_share = vgpu_vm_alloc_share; + gops->mm.vm_bind_channel = vgpu_vm_bind_channel; + gops->mm.fb_flush = vgpu_mm_fb_flush; + gops->mm.l2_invalidate = vgpu_mm_l2_invalidate; + gops->mm.l2_flush = vgpu_mm_l2_flush; + gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate; +} diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c new file mode 100644 index 00000000..cfe307ff --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -0,0 +1,416 @@ +/* + * Virtualized GPU + * + * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include "vgpu/vgpu.h" +#include "gk20a/debug_gk20a.h" +#include "gk20a/hal_gk20a.h" +#include "gk20a/hw_mc_gk20a.h" + +static inline int vgpu_comm_init(struct platform_device *pdev) +{ + size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES }; + + return tegra_gr_comm_init(pdev, TEGRA_GR_COMM_CTX_CLIENT, 3, + queue_sizes, TEGRA_VGPU_QUEUE_CMD, + ARRAY_SIZE(queue_sizes)); +} + +static inline void vgpu_comm_deinit(void) +{ + size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES }; + + tegra_gr_comm_deinit(TEGRA_GR_COMM_CTX_CLIENT, TEGRA_VGPU_QUEUE_CMD, + ARRAY_SIZE(queue_sizes)); +} + +int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in, + size_t size_out) +{ + void *handle; + size_t size = size_in; + void *data = msg; + int err; + + err = tegra_gr_comm_sendrecv(TEGRA_GR_COMM_CTX_CLIENT, + tegra_gr_comm_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, &handle, &data, &size); + if (!err) { + WARN_ON(size < size_out); + memcpy(msg, data, size_out); + tegra_gr_comm_release(handle); + } + + return err; +} + +static u64 vgpu_connect(void) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_connect_params *p = &msg.params.connect; + int err; + + msg.cmd = TEGRA_VGPU_CMD_CONNECT; + p->module = TEGRA_VGPU_MODULE_GPU; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? 0 : p->handle; +} + +int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_attrib_params *p = &msg.params.attrib; + int err; + + msg.cmd = TEGRA_VGPU_CMD_GET_ATTRIBUTE; + msg.handle = handle; + p->attrib = attrib; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + if (err || msg.ret) + return -1; + + *value = p->value; + return 0; +} + +static int vgpu_intr_thread(void *dev_id) +{ + struct gk20a *g = dev_id; + + while (true) { + struct tegra_vgpu_intr_msg *msg; + u32 sender; + void *handle; + size_t size; + int err; + + err = tegra_gr_comm_recv(TEGRA_GR_COMM_CTX_CLIENT, + TEGRA_VGPU_QUEUE_INTR, &handle, + (void **)&msg, &size, &sender); + if (WARN_ON(err)) + continue; + + if (msg->event == TEGRA_VGPU_EVENT_ABORT) { + tegra_gr_comm_release(handle); + break; + } + + if (msg->unit == TEGRA_VGPU_INTR_GR) + vgpu_gr_isr(g, &msg->info.gr_intr); + + tegra_gr_comm_release(handle); + } + + while (!kthread_should_stop()) + msleep(10); + return 0; +} + +static void vgpu_remove_support(struct platform_device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct tegra_vgpu_intr_msg msg; + int err; + + if (g->pmu.remove_support) + g->pmu.remove_support(&g->pmu); + + if (g->gr.remove_support) + g->gr.remove_support(&g->gr); + + if (g->fifo.remove_support) + g->fifo.remove_support(&g->fifo); + + if (g->mm.remove_support) + g->mm.remove_support(&g->mm); + + msg.event = TEGRA_VGPU_EVENT_ABORT; + err = tegra_gr_comm_send(TEGRA_GR_COMM_CTX_CLIENT, + TEGRA_GR_COMM_ID_SELF, TEGRA_VGPU_QUEUE_INTR, + &msg, sizeof(msg)); + WARN_ON(err); + kthread_stop(platform->intr_handler); + + /* free mappings to registers, etc*/ + + if (g->bar1) { + iounmap(g->bar1); + g->bar1 = 0; + } +} + +static int vgpu_init_support(struct platform_device *dev) +{ + struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, 0); + struct gk20a *g = get_gk20a(dev); + int err = 0; + + if (!r) { + dev_err(dev_from_gk20a(g), "faield to get gk20a bar1\n"); + err = -ENXIO; + goto fail; + } + + g->bar1 = devm_request_and_ioremap(&dev->dev, r); + if (!g->bar1) { + dev_err(dev_from_gk20a(g), "failed to remap gk20a bar1\n"); + err = -ENXIO; + goto fail; + } + + mutex_init(&g->dbg_sessions_lock); + mutex_init(&g->client_lock); + + g->remove_support = vgpu_remove_support; + return 0; + + fail: + vgpu_remove_support(dev); + return err; +} + +int vgpu_pm_prepare_poweroff(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct gk20a *g = get_gk20a(pdev); + int ret = 0; + + gk20a_dbg_fn(""); + + if (!g->power_on) + return 0; + + ret = gk20a_channel_suspend(g); + if (ret) + return ret; + + g->power_on = false; + + return ret; +} + +static void vgpu_detect_chip(struct gk20a *g) +{ + struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics; + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + + u32 mc_boot_0_value; + + if (vgpu_get_attribute(platform->virt_handle, + TEGRA_VGPU_ATTRIB_PMC_BOOT_0, + &mc_boot_0_value)) { + gk20a_err(dev_from_gk20a(g), "failed to detect chip"); + return; + } + + gpu->arch = mc_boot_0_architecture_v(mc_boot_0_value) << + NVHOST_GPU_ARCHITECTURE_SHIFT; + gpu->impl = mc_boot_0_implementation_v(mc_boot_0_value); + gpu->rev = + (mc_boot_0_major_revision_v(mc_boot_0_value) << 4) | + mc_boot_0_minor_revision_v(mc_boot_0_value); + + gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n", + g->gpu_characteristics.arch, + g->gpu_characteristics.impl, + g->gpu_characteristics.rev); +} + +static int vgpu_init_hal(struct gk20a *g) +{ + u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; + + switch (ver) { + case GK20A_GPUID_GK20A: + gk20a_dbg_info("gk20a detected"); + /* init gk20a ops then override with virt extensions */ + gk20a_init_hal(&g->ops); + vgpu_init_fifo_ops(&g->ops); + vgpu_init_gr_ops(&g->ops); + vgpu_init_ltc_ops(&g->ops); + vgpu_init_mm_ops(&g->ops); + break; + default: + gk20a_err(&g->dev->dev, "no support for %x", ver); + return -ENODEV; + } + + return 0; +} + +int vgpu_pm_finalize_poweron(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct gk20a *g = get_gk20a(pdev); + int err; + + gk20a_dbg_fn(""); + + if (g->power_on) + return 0; + + g->power_on = true; + + vgpu_detect_chip(g); + err = vgpu_init_hal(g); + if (err) + goto done; + + err = vgpu_init_mm_support(g); + if (err) { + gk20a_err(dev, "failed to init gk20a mm"); + goto done; + } + + err = vgpu_init_fifo_support(g); + if (err) { + gk20a_err(dev, "failed to init gk20a fifo"); + goto done; + } + + err = vgpu_init_gr_support(g); + if (err) { + gk20a_err(dev, "failed to init gk20a gr"); + goto done; + } + + err = gk20a_init_gpu_characteristics(g); + if (err) { + gk20a_err(dev, "failed to init gk20a gpu characteristics"); + goto done; + } + + gk20a_channel_resume(g); + +done: + return err; +} + +static int vgpu_pm_init(struct platform_device *dev) +{ + int err = 0; + + gk20a_dbg_fn(""); + + pm_runtime_enable(&dev->dev); + return err; +} + +int vgpu_probe(struct platform_device *dev) +{ + struct gk20a *gk20a; + int err; + struct gk20a_platform *platform = gk20a_get_platform(dev); + + if (!platform) { + dev_err(&dev->dev, "no platform data\n"); + return -ENODATA; + } + + gk20a_dbg_fn(""); + + gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL); + if (!gk20a) { + dev_err(&dev->dev, "couldn't allocate gk20a support"); + return -ENOMEM; + } + + platform->g = gk20a; + gk20a->dev = dev; + + err = gk20a_user_init(dev); + if (err) + return err; + + vgpu_init_support(dev); + + init_rwsem(&gk20a->busy_lock); + + spin_lock_init(&gk20a->mc_enable_lock); + + /* Initialize the platform interface. */ + err = platform->probe(dev); + if (err) { + dev_err(&dev->dev, "platform probe failed"); + return err; + } + + err = vgpu_pm_init(dev); + if (err) { + dev_err(&dev->dev, "pm init failed"); + return err; + } + + if (platform->late_probe) { + err = platform->late_probe(dev); + if (err) { + dev_err(&dev->dev, "late probe failed"); + return err; + } + } + + err = vgpu_comm_init(dev); + if (err) { + dev_err(&dev->dev, "failed to init comm interface\n"); + return -ENOSYS; + } + + platform->virt_handle = vgpu_connect(); + if (!platform->virt_handle) { + dev_err(&dev->dev, "failed to connect to server node\n"); + vgpu_comm_deinit(); + return -ENOSYS; + } + + platform->intr_handler = kthread_run(vgpu_intr_thread, gk20a, "gk20a"); + if (IS_ERR(platform->intr_handler)) + return -ENOMEM; + + gk20a_debug_init(dev); + + /* Set DMA parameters to allow larger sgt lists */ + dev->dev.dma_parms = &gk20a->dma_parms; + dma_set_max_seg_size(&dev->dev, UINT_MAX); + + gk20a->gr_idle_timeout_default = + CONFIG_GK20A_DEFAULT_TIMEOUT; + gk20a->timeouts_enabled = true; + + gk20a_create_sysfs(dev); + gk20a_init_gr(gk20a); + + return 0; +} + +int vgpu_remove(struct platform_device *dev) +{ + struct gk20a *g = get_gk20a(dev); + gk20a_dbg_fn(""); + + if (g->remove_support) + g->remove_support(dev); + + vgpu_comm_deinit(); + gk20a_user_deinit(dev); + gk20a_get_platform(dev)->g = NULL; + kfree(g); + return 0; +} diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.h b/drivers/gpu/nvgpu/vgpu/vgpu.h new file mode 100644 index 00000000..445a1c90 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/vgpu.h @@ -0,0 +1,41 @@ +/* + * Virtualized GPU Interfaces + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VIRT_H_ +#define _VIRT_H_ + +#include +#include +#include "gk20a/gk20a.h" + +int vgpu_pm_prepare_poweroff(struct device *dev); +int vgpu_pm_finalize_poweron(struct device *dev); +int vgpu_probe(struct platform_device *dev); +int vgpu_remove(struct platform_device *dev); +u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size); +int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info); +void vgpu_init_fifo_ops(struct gpu_ops *gops); +void vgpu_init_gr_ops(struct gpu_ops *gops); +void vgpu_init_ltc_ops(struct gpu_ops *gops); +void vgpu_init_mm_ops(struct gpu_ops *gops); +int vgpu_init_mm_support(struct gk20a *g); +int vgpu_init_gr_support(struct gk20a *g); +int vgpu_init_fifo_support(struct gk20a *g); + +int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value); +int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in, + size_t size_out); + +#endif -- cgit v1.2.2