gpu: nvgpu: Use preallocated VPR buffer

To prevent deadlock while allocating VPR in nvgpu, allocate all the needed VPR memory at probe time and use an internal allocator to hand out space for VPR buffers. Change-Id: I584b9a0f746d5d1dec021cdfbd6f26b4b92e4412 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1655324 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Terje Bergstrom <tbergstrom@nvidia.com> 2018-02-09 17:42:07 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-02-15 00:43:43 -0500
commit: ec00a6c2db2b6e163c2bb1245584a2d009fa1252 (patch)
tree: 7676cdb8840ccbbac287e31b2a5087d210854a8f /drivers
parent: 1582bdb5eeff44a93f53987080a652910d51c3c4 (diff)
6 files changed, 77 insertions, 81 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a.h b/drivers/gpu/nvgpu/common/linux/platform_gk20a.h
index 6994677e..ba4880af 100644
--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/common/linux/platform_gk20a.h
@@ -32,7 +32,8 @@ struct gk20a_scale_profile;
 struct secure_page_buffer {
        void (*destroy)(struct gk20a *, struct secure_page_buffer *);
        size_t size;
-        u64 iova;
+        dma_addr_t phys;
+        size_t used;
 };
 struct gk20a_platform {
@@ -148,6 +149,8 @@ struct gk20a_platform {
        /* Powerdown platform dependencies */
        void (*idle)(struct device *dev);
+        /* Preallocated VPR buffer for kernel */
+        size_t secure_buffer_size;
        struct secure_page_buffer secure_buffer;
        /* Device is going to be suspended */
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
index 127a8ce9..219dcd40 100644
--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
@@ -103,103 +103,61 @@ static void gk20a_tegra_secure_page_destroy(struct gk20a *g,
        DEFINE_DMA_ATTRS(attrs);
        dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
        dma_free_attrs(&tegra_vpr_dev, secure_buffer->size,
-                        (void *)(uintptr_t)secure_buffer->iova,
+                        (void *)(uintptr_t)secure_buffer->phys,
-                        secure_buffer->iova, __DMA_ATTR(attrs));
+                        secure_buffer->phys, __DMA_ATTR(attrs));
        secure_buffer->destroy = NULL;
 }
-int gk20a_tegra_secure_page_alloc(struct device *dev)
-{
-        struct gk20a_platform *platform = dev_get_drvdata(dev);
-        struct gk20a *g = get_gk20a(dev);
-        struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
-        DEFINE_DMA_ATTRS(attrs);
-        dma_addr_t iova;
-        size_t size = PAGE_SIZE;
-        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
-                return -EINVAL;
-        dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
-        (void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova,
-                                      GFP_KERNEL, __DMA_ATTR(attrs));
-        if (dma_mapping_error(&tegra_vpr_dev, iova))
-                return -ENOMEM;
-        secure_buffer->size = size;
-        secure_buffer->iova = iova;
-        secure_buffer->destroy = gk20a_tegra_secure_page_destroy;
-        return 0;
-}
-static void gk20a_tegra_secure_destroy(struct gk20a *g,
-                                       struct gr_ctx_buffer_desc *desc)
-{
-        DEFINE_DMA_ATTRS(attrs);
-        if (desc->mem.priv.sgt) {
-                u64 pa = nvgpu_mem_get_phys_addr(g, &desc->mem);
-                dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
-                dma_free_attrs(&tegra_vpr_dev, desc->mem.size,
-                        (void *)(uintptr_t)pa,
-                        pa, __DMA_ATTR(attrs));
-                nvgpu_free_sgtable(g, &desc->mem.priv.sgt);
-                desc->mem.priv.sgt = NULL;
-        }
-}
 static int gk20a_tegra_secure_alloc(struct gk20a *g,
                             struct gr_ctx_buffer_desc *desc,
                             size_t size)
 {
        struct device *dev = dev_from_gk20a(g);
        struct gk20a_platform *platform = dev_get_drvdata(dev);
-        DEFINE_DMA_ATTRS(attrs);
+        struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
-        dma_addr_t iova;
+        dma_addr_t phys;
        struct sg_table *sgt;
        struct page *page;
        int err = 0;
+        size_t aligned_size = PAGE_ALIGN(size);
-        dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
+        /* We ran out of preallocated memory */
-        (void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova,
+        if (secure_buffer->used + aligned_size > secure_buffer->size) {
-                                      GFP_KERNEL, __DMA_ATTR(attrs));
+                nvgpu_err(platform->g, "failed to alloc %zu bytes of VPR, %zu/%zu used",
-        if (dma_mapping_error(&tegra_vpr_dev, iova))
+                                size, secure_buffer->used, secure_buffer->size);
                return -ENOMEM;
+        }
+        phys = secure_buffer->phys + secure_buffer->used;
        sgt = nvgpu_kzalloc(platform->g, sizeof(*sgt));
        if (!sgt) {
                nvgpu_err(platform->g, "failed to allocate memory");
-                goto fail;
+                return -ENOMEM;
        }
        err = sg_alloc_table(sgt, 1, GFP_KERNEL);
        if (err) {
                nvgpu_err(platform->g, "failed to allocate sg_table");
                goto fail_sgt;
        }
-        page = phys_to_page(iova);
+        page = phys_to_page(phys);
        sg_set_page(sgt->sgl, page, size, 0);
        /* This bypasses SMMU for VPR during gmmu_map. */
        sg_dma_address(sgt->sgl) = 0;
-        desc->destroy = gk20a_tegra_secure_destroy;
+        desc->destroy = NULL;
        desc->mem.priv.sgt = sgt;
        desc->mem.size = size;
        desc->mem.aperture = APERTURE_SYSMEM;
-        if (platform->secure_buffer.destroy)
+        secure_buffer->used += aligned_size;
-                platform->secure_buffer.destroy(g, &platform->secure_buffer);
        return err;
 fail_sgt:
        nvgpu_kfree(platform->g, sgt);
-fail:
-        dma_free_attrs(&tegra_vpr_dev, desc->mem.size,
-                        (void *)(uintptr_t)iova, iova, __DMA_ATTR(attrs));
        return err;
 }
@@ -664,10 +622,32 @@ void gk20a_tegra_idle(struct device *dev)
 #endif
 }
-void gk20a_tegra_init_secure_alloc(struct gk20a *g)
+int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform)
 {
+        struct gk20a *g = platform->g;
+        struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
+        DEFINE_DMA_ATTRS(attrs);
+        dma_addr_t iova;
+        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
+                return 0;
+        dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
+        (void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova,
+                                      GFP_KERNEL, __DMA_ATTR(attrs));
+        /* Some platforms disable VPR. In that case VPR allocations always
+         * fail. Just disable VPR usage in nvgpu in that case. */
+        if (dma_mapping_error(&tegra_vpr_dev, iova))
+                return 0;
+        secure_buffer->size = platform->secure_buffer_size;
+        secure_buffer->phys = iova;
+        secure_buffer->destroy = gk20a_tegra_secure_page_destroy;
        g->ops.secure_alloc = gk20a_tegra_secure_alloc;
        __nvgpu_set_enabled(g, NVGPU_SUPPORT_VPR, true);
+        return 0;
 }
 #ifdef CONFIG_COMMON_CLK
@@ -836,7 +816,9 @@ static int gk20a_tegra_probe(struct device *dev)
        gk20a_tegra_get_clocks(dev);
        nvgpu_linux_init_clk_support(platform->g);
-        gk20a_tegra_init_secure_alloc(platform->g);
+        ret = gk20a_tegra_init_secure_alloc(platform);
+        if (ret)
+                return ret;
        if (platform->clk_register) {
                ret = platform->clk_register(platform->g);
@@ -851,9 +833,6 @@ static int gk20a_tegra_probe(struct device *dev)
 static int gk20a_tegra_late_probe(struct device *dev)
 {
-        /* Cause early VPR resize */
-        gk20a_tegra_secure_page_alloc(dev);
        return 0;
 }
@@ -974,4 +953,6 @@ struct gk20a_platform gm20b_tegra_platform = {
        .soc_name = "tegra21x",
        .unified_memory = true,
+        .secure_buffer_size = 335872,
 };
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h
index 1aa7c1e3..f7d50406 100644
--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h
+++ b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h
@@ -16,10 +16,8 @@
 #ifndef _NVGPU_PLATFORM_GK20A_TEGRA_H_
 #define _NVGPU_PLATFORM_GK20A_TEGRA_H_
-struct device;
+struct gk20a_platform;
-struct gk20a;
-void gk20a_tegra_init_secure_alloc(struct gk20a *g);
+int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform);
-int gk20a_tegra_secure_page_alloc(struct device *dev);
 #endif
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
index 0b0ebeeb..2bca2bd5 100644
--- a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
@@ -137,6 +137,10 @@ static int gp10b_tegra_probe(struct device *dev)
                return ret;
 #endif
+        ret = gk20a_tegra_init_secure_alloc(platform);
+        if (ret)
+                return ret;
        platform->disable_bigpage = !device_is_iommuable(dev);
        platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
@@ -149,16 +153,12 @@ static int gp10b_tegra_probe(struct device *dev)
        gp10b_tegra_get_clocks(dev);
        nvgpu_linux_init_clk_support(platform->g);
-        gk20a_tegra_init_secure_alloc(platform->g);
        return 0;
 }
 static int gp10b_tegra_late_probe(struct device *dev)
 {
-        /* Cause early VPR resize */
-        gk20a_tegra_secure_page_alloc(dev);
        return 0;
 }
@@ -422,6 +422,8 @@ struct gk20a_platform gp10b_tegra_platform = {
        .unified_memory = true,
        .ltc_streamid = TEGRA_SID_GPUB,
+        .secure_buffer_size = 401408,
 };
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
index 40c75164..ad56167a 100644
--- a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
+++ b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
@@ -81,6 +81,10 @@ static int gv11b_tegra_probe(struct device *dev)
        g->has_syncpoints = false;
 #endif
+        err = gk20a_tegra_init_secure_alloc(platform);
+        if (err)
+                return err;
        platform->disable_bigpage = !device_is_iommuable(dev);
        platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
@@ -93,15 +97,12 @@ static int gv11b_tegra_probe(struct device *dev)
        gp10b_tegra_get_clocks(dev);
        nvgpu_linux_init_clk_support(platform->g);
-        gk20a_tegra_init_secure_alloc(platform->g);
        return 0;
 }
 static int gv11b_tegra_late_probe(struct device *dev)
 {
-        /* Cause early VPR resize */
-        gk20a_tegra_secure_page_alloc(dev);
        return 0;
 }
@@ -263,6 +264,8 @@ struct gk20a_platform gv11b_tegra_platform = {
        .reset_assert = gp10b_tegra_reset_assert,
        .reset_deassert = gp10b_tegra_reset_deassert,
+        .secure_buffer_size = 667648,
 };
 static struct device_attribute *dev_attr_sm_l1_tag_ecc_corrected_err_count_array;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index fb02bb81..0e21f749 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2532,10 +2532,13 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
        if (err)
                goto clean_up;
-        if (g->ops.secure_alloc)
+        if (g->ops.secure_alloc) {
-                g->ops.secure_alloc(g,
+                err = g->ops.secure_alloc(g,
                                       &gr->global_ctx_buffer[CIRCULAR_VPR],
                                       cb_buffer_size);
+                if (err)
+                        goto clean_up;
+        }
        gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size);
@@ -2544,10 +2547,13 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
        if (err)
                goto clean_up;
-        if (g->ops.secure_alloc)
+        if (g->ops.secure_alloc) {
-                g->ops.secure_alloc(g,
+                err = g->ops.secure_alloc(g,
                                       &gr->global_ctx_buffer[PAGEPOOL_VPR],
                                       pagepool_buffer_size);
+                if (err)
+                        goto clean_up;
+        }
        gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size);
@@ -2556,10 +2562,13 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
        if (err)
                goto clean_up;
-        if (g->ops.secure_alloc)
+        if (g->ops.secure_alloc) {
-                g->ops.secure_alloc(g,
+                err = g->ops.secure_alloc(g,
                                       &gr->global_ctx_buffer[ATTRIBUTE_VPR],
                                       attr_buffer_size);
+                if (err)
+                        goto clean_up;
+        }
        gk20a_dbg_info("golden_image_size : %d",
                   gr->ctx_vars.golden_image_size);
author	Terje Bergstrom <tbergstrom@nvidia.com>	2018-02-09 17:42:07 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-02-15 00:43:43 -0500
commit	ec00a6c2db2b6e163c2bb1245584a2d009fa1252 (patch)
tree	7676cdb8840ccbbac287e31b2a5087d210854a8f /drivers
parent	1582bdb5eeff44a93f53987080a652910d51c3c4 (diff)