From e0f2afe5eb43fb32490ccabd504879c3e3e54623 Mon Sep 17 00:00:00 2001 From: David Nieto Date: Mon, 13 Mar 2017 20:23:03 -0700 Subject: gpu: nvgpu: refactor teardown to support unbind This change refactors the teardown in remove to ensure that it is possible to unload the driver while leaving fds open. This is achieved by making sure that the SW state is kept alive till all fds are closed and by checking that subsequent calls to ioctls after the teardown fail. Normally, this would be achieved ny calls into gk20a_busy(), but in kickoff we dont call into that to reduce latency, so we need to check the driver status directly, and also in some of the functions as we need to make sure the ioctl does not dereference the device or platform struct bug 200277762 JIRA: EVLR-1023 Change-Id: I163e47a08c29d4d5b3ab79f0eb531ef234f40bde Signed-off-by: David Nieto Reviewed-on: http://git-master/r/1320219 Reviewed-by: Terje Bergstrom Reviewed-by: svccoveritychecker Reviewed-by: Shreshtha Sahu Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 6 +++++ drivers/gpu/nvgpu/gk20a/ce2_gk20a.h | 3 +++ drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 22 ++++++++++++++--- drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 3 +-- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 24 +++++++++--------- drivers/gpu/nvgpu/gk20a/gk20a.c | 44 +++++++++++++++++++-------------- drivers/gpu/nvgpu/gk20a/gk20a.h | 7 ++++-- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 21 ++++++---------- drivers/gpu/nvgpu/pci.c | 11 +++++---- drivers/gpu/nvgpu/vgpu/vgpu.c | 11 +++++---- 10 files changed, 91 insertions(+), 61 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index 921ee6f8..5dfd2309 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c @@ -690,6 +690,12 @@ void gk20a_ce_delete_context(struct device *dev, u32 ce_ctx_id) { struct gk20a *g = gk20a_from_dev(dev); + gk20a_ce_delete_context_priv(g, ce_ctx_id); +} + +void gk20a_ce_delete_context_priv(struct gk20a *g, + u32 ce_ctx_id) +{ struct gk20a_ce_app *ce_app = &g->ce_app; struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h index 5cdd233e..7ecf130f 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h @@ -144,9 +144,12 @@ int gk20a_ce_execute_ops(struct device *dev, struct gk20a_fence *gk20a_fence_in, u32 submit_flags, struct gk20a_fence **gk20a_fence_out); +void gk20a_ce_delete_context_priv(struct gk20a *g, + u32 ce_ctx_id); void gk20a_ce_delete_context(struct device *dev, u32 ce_ctx_id); + #ifdef CONFIG_DEBUG_FS /* CE app debugfs api */ void gk20a_ce_debugfs_init(struct device *dev); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 0249a1c6..d0d38f83 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -122,7 +122,8 @@ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *ch) { - struct gk20a_platform *platform = gk20a_get_platform(f->g->dev); + struct gk20a_platform *platform; + struct gk20a *g = f->g; trace_gk20a_release_used_channel(ch->hw_chid); /* refcount is zero here and channel is in a freed/dead state */ @@ -132,10 +133,18 @@ static void free_channel(struct fifo_gk20a *f, f->used_channels--; nvgpu_mutex_release(&f->free_chs_mutex); - if (platform->aggressive_sync_destroy_thresh && + /* + * On teardown it is not possible to dereference platform, but ignoring + * this is fine then because no new channels would be created. + */ + if (!g->driver_is_dying) { + platform = gk20a_get_platform(g->dev); + + if (platform->aggressive_sync_destroy_thresh && (f->used_channels < platform->aggressive_sync_destroy_thresh)) - platform->aggressive_sync_destroy = false; + platform->aggressive_sync_destroy = false; + } } int channel_gk20a_commit_va(struct channel_gk20a *c) @@ -3016,7 +3025,12 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, bool need_deferred_cleanup = false; struct nvgpu_gpfifo __user *user_gpfifo = args ? (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL; - struct gk20a_platform *platform = gk20a_get_platform(d); + struct gk20a_platform *platform; + + if (g->driver_is_dying) + return -ENODEV; + + platform = gk20a_get_platform(d); if (c->has_timedout) return -ETIMEDOUT; diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index c87226c8..1ae42337 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c @@ -1344,8 +1344,7 @@ static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g, long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct gk20a_ctrl_priv *priv = filp->private_data; - struct device *dev = priv->dev; - struct gk20a *g = get_gk20a(dev); + struct gk20a *g = priv->g; struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args; struct nvgpu_gpu_zcull_get_info_args *get_info_args; struct nvgpu_gpu_zbc_set_table_args *set_table_args; diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index db534318..165bcf46 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -665,7 +665,7 @@ static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_timeout_args *args) { int err; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; gk20a_dbg_fn("powergate mode = %d", args->enable); @@ -680,7 +680,7 @@ static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_timeout_args *args) { int status; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; nvgpu_mutex_acquire(&g->dbg_sessions_lock); status = g->timeouts_enabled; @@ -711,7 +711,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args) { - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct gr_gk20a *gr = &g->gr; struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state; u32 sm_id; @@ -750,7 +750,7 @@ static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state( struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *args) { - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct gr_gk20a *gr = &g->gr; u32 sm_id; struct channel_gk20a *ch; @@ -781,7 +781,7 @@ static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state( struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_write_single_sm_error_state_args *args) { - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct gr_gk20a *gr = &g->gr; u32 sm_id; struct channel_gk20a *ch; @@ -952,7 +952,7 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct dbg_session_gk20a *dbg_s = filp->private_data; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; u8 buf[NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE]; int err = 0; @@ -1141,7 +1141,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, bool is_pg_disabled = false; struct device *dev = dbg_s->dev; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct channel_gk20a *ch; gk20a_dbg_fn("%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops); @@ -1257,7 +1257,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, u32 powermode) { int err = 0; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; /* This function must be called with g->dbg_sessions_lock held */ @@ -1360,7 +1360,7 @@ static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_powergate_args *args) { int err; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; gk20a_dbg_fn("%s powergate mode = %d", dev_name(dbg_s->dev), args->mode); @@ -1374,7 +1374,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args) { int err; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct channel_gk20a *ch_gk20a; gk20a_dbg_fn("%s smpc ctxsw mode = %d", @@ -1416,7 +1416,7 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args) { int err; - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct channel_gk20a *ch_gk20a; gk20a_dbg_fn("%s pm ctxsw mode = %d", @@ -1468,7 +1468,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args) { - struct gk20a *g = get_gk20a(dbg_s->dev); + struct gk20a *g = dbg_s->g; struct channel_gk20a *ch; int err = 0, action = args->mode; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 2046c08e..694f0e93 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -334,26 +334,14 @@ static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id) return g->ops.mc.isr_thread_stall(g); } -void gk20a_remove_support(struct device *dev) +void gk20a_remove_support(struct gk20a *g) { - struct gk20a *g = get_gk20a(dev); - #ifdef CONFIG_TEGRA_COMMON tegra_unregister_idle_unidle(); #endif if (g->dbg_regops_tmp_buf) kfree(g->dbg_regops_tmp_buf); - nvgpu_wait_for_deferred_interrupts(g); - - gk20a_channel_cancel_pending_sema_waits(g); - - if (g->nonstall_work_queue) { - cancel_work_sync(&g->nonstall_fn_work); - destroy_workqueue(g->nonstall_work_queue); - g->nonstall_work_queue = NULL; - } - if (g->pmu.remove_support) g->pmu.remove_support(&g->pmu); @@ -1251,6 +1239,11 @@ static int gk20a_probe(struct platform_device *dev) if (gk20a->irq_stall != gk20a->irq_nonstall) disable_irq(gk20a->irq_nonstall); + /* + * is_fmodel needs to be in gk20a struct for deferred teardown + */ + gk20a->is_fmodel = platform->is_fmodel; + err = gk20a_init_support(dev); if (err) return err; @@ -1297,11 +1290,6 @@ static int __exit gk20a_remove(struct platform_device *pdev) if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) gk20a_scale_exit(dev); - if (g->remove_support) - g->remove_support(dev); - - gk20a_ce_destroy(g); - #ifdef CONFIG_ARCH_TEGRA_18x_SOC nvgpu_clk_arb_cleanup_arbiter(g); #endif @@ -1390,7 +1378,21 @@ void gk20a_busy_noresume(struct device *dev) void gk20a_driver_start_unload(struct gk20a *g) { gk20a_dbg(gpu_dbg_shutdown, "Driver is now going down!\n"); + + down_write(&g->busy_lock); g->driver_is_dying = 1; + up_write(&g->busy_lock); + + gk20a_wait_for_idle(g->dev); + + nvgpu_wait_for_deferred_interrupts(g); + gk20a_channel_cancel_pending_sema_waits(g); + + if (g->nonstall_work_queue) { + cancel_work_sync(&g->nonstall_fn_work); + destroy_workqueue(g->nonstall_work_queue); + g->nonstall_work_queue = NULL; + } } int gk20a_wait_for_idle(struct device *dev) @@ -1859,6 +1861,12 @@ static void gk20a_free_cb(struct kref *refcount) struct gk20a, refcount); gk20a_dbg(gpu_dbg_shutdown, "Freeing GK20A struct!"); + + gk20a_ce_destroy(g); + + if (g->remove_support) + g->remove_support(g); + kfree(g); } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 30f1b371..f4ca5649 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -877,6 +877,9 @@ struct gk20a { atomic_t nonstall_ops; struct work_struct nonstall_fn_work; struct workqueue_struct *nonstall_work_queue; + + bool is_fmodel; + struct kref refcount; struct resource *reg_mem; @@ -987,7 +990,7 @@ struct gk20a { bool global_profiler_reservation_held; int profiler_reservation_count; - void (*remove_support)(struct device *); + void (*remove_support)(struct gk20a *); u64 pg_ingating_time_us; u64 pg_ungating_time_us; @@ -1455,7 +1458,7 @@ extern struct class nvgpu_class; int gk20a_pm_init(struct device *dev); int gk20a_pm_finalize_poweron(struct device *dev); -void gk20a_remove_support(struct device *dev); +void gk20a_remove_support(struct gk20a *g); static inline struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch) { diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 7b08387e..9e6dc74c 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -809,7 +809,7 @@ static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm) struct gk20a *g = gk20a_from_mm(mm); if (mm->vidmem.ce_ctx_id != (u32)~0) - gk20a_ce_delete_context(g->dev, mm->vidmem.ce_ctx_id); + gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id); mm->vidmem.ce_ctx_id = (u32)~0; @@ -1220,11 +1220,10 @@ static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, u32 num_pages = 1 << order; u32 len = num_pages * PAGE_SIZE; int err; - struct gk20a_platform *platform = dev_get_drvdata(g->dev); gk20a_dbg_fn(""); - if (platform->is_fmodel) + if (g->is_fmodel) return alloc_gmmu_phys_pages(vm, order, entry); /* @@ -1250,7 +1249,6 @@ void free_gmmu_pages(struct vm_gk20a *vm, struct gk20a_mm_entry *entry) { struct gk20a *g = gk20a_from_vm(vm); - struct gk20a_platform *platform = dev_get_drvdata(g->dev); gk20a_dbg_fn(""); @@ -1260,7 +1258,7 @@ void free_gmmu_pages(struct vm_gk20a *vm, if (entry->woffset) /* fake shadow mem */ return; - if (platform->is_fmodel) { + if (g->is_fmodel) { free_gmmu_phys_pages(vm, entry); return; } @@ -1270,11 +1268,9 @@ void free_gmmu_pages(struct vm_gk20a *vm, int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) { - struct gk20a_platform *platform = dev_get_drvdata(g->dev); - gk20a_dbg_fn(""); - if (platform->is_fmodel) + if (g->is_fmodel) return map_gmmu_phys_pages(entry); if (IS_ENABLED(CONFIG_ARM64)) { @@ -1296,11 +1292,9 @@ int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) { - struct gk20a_platform *platform = dev_get_drvdata(g->dev); - gk20a_dbg_fn(""); - if (platform->is_fmodel) { + if (g->is_fmodel) { unmap_gmmu_phys_pages(entry); return; } @@ -4070,6 +4064,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) struct mapped_buffer_node *mapped_buffer; struct vm_reserved_va_node *va_node, *va_node_tmp; struct rb_node *node; + struct gk20a *g = vm->mm->g; gk20a_dbg_fn(""); @@ -4078,7 +4073,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) * pool involves unmapping a GMMU mapping which means aquiring the * update_gmmu_lock. */ - if (!gk20a_platform_has_syncpoints(gk20a_from_vm(vm)->dev)) { + if (!(g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)) { if (vm->sema_pool) { nvgpu_semaphore_pool_unmap(vm->sema_pool, vm); nvgpu_semaphore_pool_put(vm->sema_pool); @@ -4172,7 +4167,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) /* * Don't waste the memory on semaphores if we don't need them. */ - if (gk20a_platform_has_syncpoints(g->dev)) + if (g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS) return 0; if (vm->sema_pool) diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c index 114e9af7..a7899f7e 100644 --- a/drivers/gpu/nvgpu/pci.c +++ b/drivers/gpu/nvgpu/pci.c @@ -393,6 +393,11 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, } disable_irq(g->irq_stall); + /* + * is_fmodel needs to be in gk20a struct for deferred teardown + */ + g->is_fmodel = platform->is_fmodel; + err = nvgpu_pci_init_support(pdev); if (err) return err; @@ -426,7 +431,6 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) struct gk20a *g = get_gk20a(&pdev->dev); gk20a_dbg(gpu_dbg_shutdown, "Removing nvgpu driver!\n"); - gk20a_driver_start_unload(g); if (g->irqs_enabled) disable_irq(g->irq_stall); @@ -445,7 +449,7 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) * Wait for the driver to finish up all the IOCTLs it's working on * before cleaning up the driver's data structures. */ - gk20a_wait_for_idle(&pdev->dev); + gk20a_driver_start_unload(g); gk20a_dbg(gpu_dbg_shutdown, "Driver idle.\n"); #ifdef CONFIG_ARCH_TEGRA_18x_SOC @@ -455,9 +459,6 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) gk20a_user_deinit(g->dev, &nvgpu_pci_class); gk20a_dbg(gpu_dbg_shutdown, "User de-init done.\b"); - if (g->remove_support) - g->remove_support(g->dev); - debugfs_remove_recursive(platform->debugfs); debugfs_remove_recursive(platform->debugfs_alias); diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index 72606952..df793be7 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -191,10 +191,9 @@ static int vgpu_intr_thread(void *dev_id) return 0; } -static void vgpu_remove_support(struct device *dev) +static void vgpu_remove_support(struct gk20a *g) { - struct gk20a *g = get_gk20a(dev); - struct vgpu_priv_data *priv = vgpu_get_priv_data_from_dev(dev); + struct vgpu_priv_data *priv = vgpu_get_priv_data_from_dev(g->dev); struct tegra_vgpu_intr_msg msg; int err; @@ -265,7 +264,7 @@ static int vgpu_init_support(struct platform_device *pdev) return 0; fail: - vgpu_remove_support(&pdev->dev); + vgpu_remove_support(g); return err; } @@ -571,6 +570,8 @@ int vgpu_probe(struct platform_device *pdev) platform->vgpu_priv = priv; gk20a->dev = dev; + gk20a->is_fmodel = platform->is_fmodel; + nvgpu_kmem_init(gk20a); err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class); @@ -653,7 +654,7 @@ int vgpu_remove(struct platform_device *pdev) vgpu_pm_qos_remove(dev); if (g->remove_support) - g->remove_support(dev); + g->remove_support(g); vgpu_comm_deinit(); gk20a_sched_ctrl_cleanup(g); -- cgit v1.2.2