From b42fb7ba26b565f93118fbdd9e17b42ee6144c5e Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Tue, 14 Nov 2017 06:43:28 -0800 Subject: gpu: nvgpu: move vgpu code to linux Most of VGPU code is linux specific but lies in common code So until VGPU code is properly abstracted and made os-independent, move all of VGPU code to linux specific directory Handle corresponding Makefile changes Update all #includes to reflect new paths Add GPL license to newly added linux files Jira NVGPU-387 Change-Id: Ic133e4c80e570bcc273f0dacf45283fefd678923 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1599472 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/vgpu/vgpu.c | 776 +++++++++++++++++++++++++++++ 1 file changed, 776 insertions(+) create mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/vgpu.c (limited to 'drivers/gpu/nvgpu/common/linux/vgpu/vgpu.c') diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.c new file mode 100644 index 00000000..7768b21d --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.c @@ -0,0 +1,776 @@ +/* + * Virtualized GPU + * + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "vgpu.h" +#include "fecs_trace_vgpu.h" +#include "clk_vgpu.h" +#include "gk20a/tsg_gk20a.h" +#include "gk20a/channel_gk20a.h" +#include "gm20b/hal_gm20b.h" + +#include "common/linux/module.h" +#include "common/linux/os_linux.h" +#include "common/linux/ioctl.h" +#include "common/linux/scale.h" +#include "common/linux/driver_common.h" + +#ifdef CONFIG_TEGRA_19x_GPU +#include "common/linux/vgpu/vgpu_t19x.h" +#include +#endif + +#include + +static inline int vgpu_comm_init(struct platform_device *pdev) +{ + size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES }; + + return tegra_gr_comm_init(pdev, TEGRA_GR_COMM_CTX_CLIENT, 3, + queue_sizes, TEGRA_VGPU_QUEUE_CMD, + ARRAY_SIZE(queue_sizes)); +} + +static inline void vgpu_comm_deinit(void) +{ + size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES }; + + tegra_gr_comm_deinit(TEGRA_GR_COMM_CTX_CLIENT, TEGRA_VGPU_QUEUE_CMD, + ARRAY_SIZE(queue_sizes)); +} + +int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in, + size_t size_out) +{ + void *handle; + size_t size = size_in; + void *data = msg; + int err; + + err = tegra_gr_comm_sendrecv(TEGRA_GR_COMM_CTX_CLIENT, + tegra_gr_comm_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, &handle, &data, &size); + if (!err) { + WARN_ON(size < size_out); + memcpy(msg, data, size_out); + tegra_gr_comm_release(handle); + } + + return err; +} + +static u64 vgpu_connect(void) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_connect_params *p = &msg.params.connect; + int err; + + msg.cmd = TEGRA_VGPU_CMD_CONNECT; + p->module = TEGRA_VGPU_MODULE_GPU; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? 0 : p->handle; +} + +int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_attrib_params *p = &msg.params.attrib; + int err; + + msg.cmd = TEGRA_VGPU_CMD_GET_ATTRIBUTE; + msg.handle = handle; + p->attrib = attrib; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + if (err || msg.ret) + return -1; + + *value = p->value; + return 0; +} + +static void vgpu_handle_channel_event(struct gk20a *g, + struct tegra_vgpu_channel_event_info *info) +{ + if (info->id >= g->fifo.num_channels || + info->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) { + nvgpu_err(g, "invalid channel event"); + return; + } + + if (info->is_tsg) { + struct tsg_gk20a *tsg = &g->fifo.tsg[info->id]; + + gk20a_tsg_event_id_post_event(tsg, info->event_id); + } else { + struct channel_gk20a *ch = &g->fifo.channel[info->id]; + + if (!gk20a_channel_get(ch)) { + nvgpu_err(g, "invalid channel %d for event %d", + (int)info->id, (int)info->event_id); + return; + } + gk20a_channel_event_id_post_event(ch, info->event_id); + gk20a_channel_put(ch); + } +} + + + +static int vgpu_intr_thread(void *dev_id) +{ + struct gk20a *g = dev_id; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + while (true) { + struct tegra_vgpu_intr_msg *msg; + u32 sender; + void *handle; + size_t size; + int err; + + err = tegra_gr_comm_recv(TEGRA_GR_COMM_CTX_CLIENT, + TEGRA_VGPU_QUEUE_INTR, &handle, + (void **)&msg, &size, &sender); + if (err == -ETIME) + continue; + if (WARN_ON(err)) + continue; + + if (msg->event == TEGRA_VGPU_EVENT_ABORT) { + tegra_gr_comm_release(handle); + break; + } + + switch (msg->event) { + case TEGRA_VGPU_EVENT_INTR: + if (msg->unit == TEGRA_VGPU_INTR_GR) + vgpu_gr_isr(g, &msg->info.gr_intr); + else if (msg->unit == TEGRA_VGPU_NONSTALL_INTR_GR) + vgpu_gr_nonstall_isr(g, + &msg->info.gr_nonstall_intr); + else if (msg->unit == TEGRA_VGPU_INTR_FIFO) + vgpu_fifo_isr(g, &msg->info.fifo_intr); + else if (msg->unit == TEGRA_VGPU_NONSTALL_INTR_FIFO) + vgpu_fifo_nonstall_isr(g, + &msg->info.fifo_nonstall_intr); + else if (msg->unit == TEGRA_VGPU_NONSTALL_INTR_CE2) + vgpu_ce2_nonstall_isr(g, + &msg->info.ce2_nonstall_intr); + break; + case TEGRA_VGPU_EVENT_FECS_TRACE: + vgpu_fecs_trace_data_update(g); + break; + case TEGRA_VGPU_EVENT_CHANNEL: + vgpu_handle_channel_event(g, &msg->info.channel_event); + break; + case TEGRA_VGPU_EVENT_SM_ESR: + vgpu_gr_handle_sm_esr_event(g, &msg->info.sm_esr); + break; + default: + nvgpu_err(g, "unknown event %u", msg->event); + break; + } + + tegra_gr_comm_release(handle); + } + + while (!nvgpu_thread_should_stop(&priv->intr_handler)) + msleep(10); + return 0; +} + +static void vgpu_remove_support(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct vgpu_priv_data *priv = + vgpu_get_priv_data_from_dev(dev_from_gk20a(g)); + struct tegra_vgpu_intr_msg msg; + int err; + + if (g->dbg_regops_tmp_buf) + nvgpu_kfree(g, g->dbg_regops_tmp_buf); + + if (g->pmu.remove_support) + g->pmu.remove_support(&g->pmu); + + if (g->gr.remove_support) + g->gr.remove_support(&g->gr); + + if (g->fifo.remove_support) + g->fifo.remove_support(&g->fifo); + + if (g->mm.remove_support) + g->mm.remove_support(&g->mm); + + msg.event = TEGRA_VGPU_EVENT_ABORT; + err = tegra_gr_comm_send(TEGRA_GR_COMM_CTX_CLIENT, + TEGRA_GR_COMM_ID_SELF, TEGRA_VGPU_QUEUE_INTR, + &msg, sizeof(msg)); + WARN_ON(err); + nvgpu_thread_stop(&priv->intr_handler); + + /* free mappings to registers, etc*/ + + if (l->bar1) { + iounmap(l->bar1); + l->bar1 = NULL; + } +} + +static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + nvgpu_mutex_init(&g->poweron_lock); + nvgpu_mutex_init(&g->poweroff_lock); + l->regs_saved = l->regs; + l->bar1_saved = l->bar1; + + nvgpu_init_list_node(&g->pending_sema_waits); + nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock); + + g->aggressive_sync_destroy = platform->aggressive_sync_destroy; + g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; + g->has_syncpoints = platform->has_syncpoints; + g->ptimer_src_freq = platform->ptimer_src_freq; + g->can_railgate = platform->can_railgate_init; + g->railgate_delay = platform->railgate_delay_init; + + __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES, + platform->unify_address_spaces); +} + +static int vgpu_init_support(struct platform_device *pdev) +{ + struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + struct gk20a *g = get_gk20a(&pdev->dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + void __iomem *regs; + int err = 0; + + if (!r) { + nvgpu_err(g, "failed to get gk20a bar1"); + err = -ENXIO; + goto fail; + } + + if (r->name && !strcmp(r->name, "/vgpu")) { + regs = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(regs)) { + nvgpu_err(g, "failed to remap gk20a bar1"); + err = PTR_ERR(regs); + goto fail; + } + l->bar1 = regs; + l->bar1_mem = r; + } + + nvgpu_mutex_init(&g->dbg_sessions_lock); + nvgpu_mutex_init(&g->client_lock); + + nvgpu_init_list_node(&g->profiler_objects); + + g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); + if (!g->dbg_regops_tmp_buf) { + nvgpu_err(g, "couldn't allocate regops tmp buf"); + return -ENOMEM; + } + g->dbg_regops_tmp_buf_ops = + SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); + + g->remove_support = vgpu_remove_support; + return 0; + + fail: + vgpu_remove_support(g); + return err; +} + +int vgpu_pm_prepare_poweroff(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + int ret = 0; + + gk20a_dbg_fn(""); + + if (!g->power_on) + return 0; + + ret = gk20a_channel_suspend(g); + if (ret) + return ret; + + g->power_on = false; + + return ret; +} + +static void vgpu_detect_chip(struct gk20a *g) +{ + struct nvgpu_gpu_params *p = &g->params; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + p->gpu_arch = priv->constants.arch; + p->gpu_impl = priv->constants.impl; + p->gpu_rev = priv->constants.rev; + + gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n", + p->gpu_arch, + p->gpu_impl, + p->gpu_rev); +} + +int vgpu_init_gpu_characteristics(struct gk20a *g) +{ + int err; + + gk20a_dbg_fn(""); + + err = gk20a_init_gpu_characteristics(g); + if (err) + return err; + + __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, false); + + /* features vgpu does not support */ + __nvgpu_set_enabled(g, NVGPU_SUPPORT_RESCHEDULE_RUNLIST, false); + + return 0; +} + +int vgpu_read_ptimer(struct gk20a *g, u64 *value) +{ + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_read_ptimer_params *p = &msg.params.read_ptimer; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_READ_PTIMER; + msg.handle = vgpu_get_handle(g); + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (!err) + *value = p->time; + else + nvgpu_err(g, "vgpu read ptimer failed, err=%d", err); + + return err; +} + +int vgpu_get_timestamps_zipper(struct gk20a *g, + u32 source_id, u32 count, + struct nvgpu_cpu_time_correlation_sample *samples) +{ + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_get_timestamps_zipper_params *p = + &msg.params.get_timestamps_zipper; + int err; + u32 i; + + gk20a_dbg_fn(""); + + if (count > TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT) { + nvgpu_err(g, "count %u overflow", count); + return -EINVAL; + } + + msg.cmd = TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER; + msg.handle = vgpu_get_handle(g); + p->source_id = TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC; + p->count = count; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(g, "vgpu get timestamps zipper failed, err=%d", err); + return err; + } + + for (i = 0; i < count; i++) { + samples[i].cpu_timestamp = p->samples[i].cpu_timestamp; + samples[i].gpu_timestamp = p->samples[i].gpu_timestamp; + } + + return err; +} + +static int vgpu_init_hal(struct gk20a *g) +{ + u32 ver = g->params.gpu_arch + g->params.gpu_impl; + int err; + + switch (ver) { + case GK20A_GPUID_GM20B: + case GK20A_GPUID_GM20B_B: + gk20a_dbg_info("gm20b detected"); + err = vgpu_gm20b_init_hal(g); + break; + case NVGPU_GPUID_GP10B: + gk20a_dbg_info("gp10b detected"); + err = vgpu_gp10b_init_hal(g); + break; +#ifdef CONFIG_TEGRA_19x_GPU + case TEGRA_19x_GPUID: + err = vgpu_t19x_init_hal(g); + break; +#endif + default: + nvgpu_err(g, "no support for %x", ver); + err = -ENODEV; + break; + } + + return err; +} + +int vgpu_pm_finalize_poweron(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + int err; + + gk20a_dbg_fn(""); + + if (g->power_on) + return 0; + + g->power_on = true; + + vgpu_detect_chip(g); + err = vgpu_init_hal(g); + if (err) + goto done; + + if (g->ops.ltc.init_fs_state) + g->ops.ltc.init_fs_state(g); + + err = vgpu_init_mm_support(g); + if (err) { + nvgpu_err(g, "failed to init gk20a mm"); + goto done; + } + + err = vgpu_init_fifo_support(g); + if (err) { + nvgpu_err(g, "failed to init gk20a fifo"); + goto done; + } + + err = vgpu_init_gr_support(g); + if (err) { + nvgpu_err(g, "failed to init gk20a gr"); + goto done; + } + + err = g->ops.chip_init_gpu_characteristics(g); + if (err) { + nvgpu_err(g, "failed to init gk20a gpu characteristics"); + goto done; + } + + gk20a_ctxsw_trace_init(g); + gk20a_sched_ctrl_init(g); + gk20a_channel_resume(g); + +done: + return err; +} + +static int vgpu_qos_notify(struct notifier_block *nb, + unsigned long n, void *data) +{ + struct gk20a_scale_profile *profile = + container_of(nb, struct gk20a_scale_profile, + qos_notify_block); + struct gk20a *g = get_gk20a(profile->dev); + u32 max_freq; + int err; + + gk20a_dbg_fn(""); + + max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS); + err = vgpu_clk_cap_rate(profile->dev, max_freq); + if (err) + nvgpu_err(g, "%s failed, err=%d", __func__, err); + + return NOTIFY_OK; /* need notify call further */ +} + +static int vgpu_pm_qos_init(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct gk20a_scale_profile *profile = g->scale_profile; + + if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) { + if (!profile) + return -EINVAL; + } else { + profile = nvgpu_kzalloc(g, sizeof(*profile)); + if (!profile) + return -ENOMEM; + g->scale_profile = profile; + } + + profile->dev = dev; + profile->qos_notify_block.notifier_call = vgpu_qos_notify; + pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, + &profile->qos_notify_block); + return 0; +} + +static void vgpu_pm_qos_remove(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + + pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, + &g->scale_profile->qos_notify_block); + nvgpu_kfree(g, g->scale_profile); + g->scale_profile = NULL; +} + +static int vgpu_pm_init(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + unsigned long *freqs; + int num_freqs; + int err = 0; + + gk20a_dbg_fn(""); + + if (nvgpu_platform_is_simulation(g)) + return 0; + + __pm_runtime_disable(dev, false); + + if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) + gk20a_scale_init(dev); + + if (l->devfreq) { + /* set min/max frequency based on frequency table */ + err = vgpu_clk_get_freqs(dev, &freqs, &num_freqs); + if (err) + return err; + + if (num_freqs < 1) + return -EINVAL; + + l->devfreq->min_freq = freqs[0]; + l->devfreq->max_freq = freqs[num_freqs - 1]; + } + + err = vgpu_pm_qos_init(dev); + if (err) + return err; + + return err; +} + +static int vgpu_get_constants(struct gk20a *g) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_constants_params *p = &msg.params.constants; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_GET_CONSTANTS; + msg.handle = vgpu_get_handle(g); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + + if (unlikely(err)) { + nvgpu_err(g, "%s failed, err=%d", __func__, err); + return err; + } + + if (unlikely(p->gpc_count > TEGRA_VGPU_MAX_GPC_COUNT || + p->max_tpc_per_gpc_count > TEGRA_VGPU_MAX_TPC_COUNT_PER_GPC)) { + nvgpu_err(g, "gpc_count %d max_tpc_per_gpc %d overflow", + (int)p->gpc_count, (int)p->max_tpc_per_gpc_count); + return -EINVAL; + } + + priv->constants = *p; + return 0; +} + +int vgpu_probe(struct platform_device *pdev) +{ + struct nvgpu_os_linux *l; + struct gk20a *gk20a; + int err; + struct device *dev = &pdev->dev; + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct vgpu_priv_data *priv; + + if (!platform) { + dev_err(dev, "no platform data\n"); + return -ENODATA; + } + + gk20a_dbg_fn(""); + + l = kzalloc(sizeof(*l), GFP_KERNEL); + if (!l) { + dev_err(dev, "couldn't allocate gk20a support"); + return -ENOMEM; + } + gk20a = &l->g; + nvgpu_init_gk20a(gk20a); + + nvgpu_kmem_init(gk20a); + + err = nvgpu_init_enabled_flags(gk20a); + if (err) { + kfree(gk20a); + return err; + } + + l->dev = dev; + if (tegra_platform_is_vdk()) + __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); + + gk20a->is_virtual = true; + + priv = nvgpu_kzalloc(gk20a, sizeof(*priv)); + if (!priv) { + kfree(gk20a); + return -ENOMEM; + } + + platform->g = gk20a; + platform->vgpu_priv = priv; + + err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class); + if (err) + return err; + + vgpu_init_support(pdev); + + vgpu_init_vars(gk20a, platform); + + init_rwsem(&l->busy_lock); + + nvgpu_spinlock_init(&gk20a->mc_enable_lock); + + gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms; + + /* Initialize the platform interface. */ + err = platform->probe(dev); + if (err) { + if (err == -EPROBE_DEFER) + dev_info(dev, "platform probe failed"); + else + dev_err(dev, "platform probe failed"); + return err; + } + + if (platform->late_probe) { + err = platform->late_probe(dev); + if (err) { + dev_err(dev, "late probe failed"); + return err; + } + } + + err = vgpu_comm_init(pdev); + if (err) { + dev_err(dev, "failed to init comm interface\n"); + return -ENOSYS; + } + + priv->virt_handle = vgpu_connect(); + if (!priv->virt_handle) { + dev_err(dev, "failed to connect to server node\n"); + vgpu_comm_deinit(); + return -ENOSYS; + } + + err = vgpu_get_constants(gk20a); + if (err) { + vgpu_comm_deinit(); + return err; + } + + err = vgpu_pm_init(dev); + if (err) { + dev_err(dev, "pm init failed"); + return err; + } + + err = nvgpu_thread_create(&priv->intr_handler, gk20a, + vgpu_intr_thread, "gk20a"); + if (err) + return err; + + gk20a_debug_init(gk20a, "gpu.0"); + + /* Set DMA parameters to allow larger sgt lists */ + dev->dma_parms = &l->dma_parms; + dma_set_max_seg_size(dev, UINT_MAX); + + gk20a->gr_idle_timeout_default = + CONFIG_GK20A_DEFAULT_TIMEOUT; + gk20a->timeouts_enabled = true; + + vgpu_create_sysfs(dev); + gk20a_init_gr(gk20a); + + nvgpu_ref_init(&gk20a->refcount); + + return 0; +} + +int vgpu_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct gk20a *g = get_gk20a(dev); + gk20a_dbg_fn(""); + + vgpu_pm_qos_remove(dev); + if (g->remove_support) + g->remove_support(g); + + vgpu_comm_deinit(); + gk20a_sched_ctrl_cleanup(g); + gk20a_user_deinit(dev, &nvgpu_class); + vgpu_remove_sysfs(dev); + gk20a_get_platform(dev)->g = NULL; + gk20a_put(g); + + return 0; +} -- cgit v1.2.2