summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2016-08-10 11:09:47 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-09-01 12:10:31 -0400
commit6a48f4b3350f933b171edd4fac4a6860e53c2d64 (patch)
tree2bdf546bfff5744f0c236acef02c2aa917e239cc /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parentf79639f61858c377cf1f3facfc0ce631f787f0e6 (diff)
gpu: nvgpu: clear vidmem buffers in worker
We clear buffers allocated in vidmem in buffer free path. But to clear buffers, we need to submit CE jobs and this could cause issues/races if free called from critical path Hence solve this by moving buffer clear/free to a worker gk20a_gmmu_free_attr_vid() will now just put mem_desc into a list and schedule a worker And worker thread will traverse the list and clear/free the allocations In struct gk20a_vidmem_buf, mem variable is statically allocated. But since we delay free of mem, convert this variable into a pointer and allocate it dynamically Since we delay free of vidmem memory, it is now possible to face OOM conditions during allocations. Hence while allocating block until we have sufficient memory available with an upper limit of 1S Jira DNVGPU-84 Change-Id: I7925590644afae50b6fc04c6e1e43bbaa1c220fd Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1201346 (cherry picked from commit b4dec4a30de2431369d677acca00e420f8e581a5) Reviewed-on: http://git-master/r/1210950 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c129
1 files changed, 104 insertions, 25 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 65157ccd..c9681861 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -53,6 +53,10 @@
53 */ 53 */
54#define GK20A_FORCE_PRAMIN_DEFAULT false 54#define GK20A_FORCE_PRAMIN_DEFAULT false
55 55
56#if defined(CONFIG_GK20A_VIDMEM)
57static void gk20a_vidmem_clear_mem_worker(struct work_struct *work);
58#endif
59
56int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) 60int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem)
57{ 61{
58 void *cpu_va; 62 void *cpu_va;
@@ -437,7 +441,7 @@ struct gk20a_dmabuf_priv {
437 441
438struct gk20a_vidmem_buf { 442struct gk20a_vidmem_buf {
439 struct gk20a *g; 443 struct gk20a *g;
440 struct mem_desc mem; 444 struct mem_desc *mem;
441 struct dma_buf *dmabuf; 445 struct dma_buf *dmabuf;
442 void *dmabuf_priv; 446 void *dmabuf_priv;
443 void (*dmabuf_priv_delete)(void *); 447 void (*dmabuf_priv_delete)(void *);
@@ -882,6 +886,10 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
882 mm->vidmem.bootstrap_base = bootstrap_base; 886 mm->vidmem.bootstrap_base = bootstrap_base;
883 mm->vidmem.bootstrap_size = bootstrap_size; 887 mm->vidmem.bootstrap_size = bootstrap_size;
884 888
889 INIT_WORK(&mm->vidmem_clear_mem_worker, gk20a_vidmem_clear_mem_worker);
890 INIT_LIST_HEAD(&mm->vidmem.clear_list_head);
891 mutex_init(&mm->vidmem.clear_list_mutex);
892
885 gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); 893 gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);
886 894
887#endif 895#endif
@@ -1988,7 +1996,7 @@ static struct sg_table *gk20a_vidbuf_map_dma_buf(
1988{ 1996{
1989 struct gk20a_vidmem_buf *buf = attach->dmabuf->priv; 1997 struct gk20a_vidmem_buf *buf = attach->dmabuf->priv;
1990 1998
1991 return buf->mem.sgt; 1999 return buf->mem->sgt;
1992} 2000}
1993 2001
1994static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach, 2002static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach,
@@ -2006,7 +2014,7 @@ static void gk20a_vidbuf_release(struct dma_buf *dmabuf)
2006 if (buf->dmabuf_priv) 2014 if (buf->dmabuf_priv)
2007 buf->dmabuf_priv_delete(buf->dmabuf_priv); 2015 buf->dmabuf_priv_delete(buf->dmabuf_priv);
2008 2016
2009 gk20a_gmmu_free(buf->g, &buf->mem); 2017 gk20a_gmmu_free(buf->g, buf->mem);
2010 kfree(buf); 2018 kfree(buf);
2011} 2019}
2012 2020
@@ -2065,12 +2073,12 @@ static struct dma_buf *gk20a_vidbuf_export(struct gk20a_vidmem_buf *buf)
2065 2073
2066 exp_info.priv = buf; 2074 exp_info.priv = buf;
2067 exp_info.ops = &gk20a_vidbuf_ops; 2075 exp_info.ops = &gk20a_vidbuf_ops;
2068 exp_info.size = buf->mem.size; 2076 exp_info.size = buf->mem->size;
2069 exp_info.flags = O_RDWR; 2077 exp_info.flags = O_RDWR;
2070 2078
2071 return dma_buf_export(&exp_info); 2079 return dma_buf_export(&exp_info);
2072#else 2080#else
2073 return dma_buf_export(buf, &gk20a_vidbuf_ops, buf->mem.size, 2081 return dma_buf_export(buf, &gk20a_vidbuf_ops, buf->mem->size,
2074 O_RDWR, NULL); 2082 O_RDWR, NULL);
2075#endif 2083#endif
2076} 2084}
@@ -2112,10 +2120,14 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
2112 } 2120 }
2113 } 2121 }
2114 2122
2115 err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem); 2123 buf->mem = kzalloc(sizeof(struct mem_desc), GFP_KERNEL);
2116 if (err) 2124 if (!buf->mem)
2117 goto err_kfree; 2125 goto err_kfree;
2118 2126
2127 err = gk20a_gmmu_alloc_vid(g, bytes, buf->mem);
2128 if (err)
2129 goto err_memfree;
2130
2119 buf->dmabuf = gk20a_vidbuf_export(buf); 2131 buf->dmabuf = gk20a_vidbuf_export(buf);
2120 if (IS_ERR(buf->dmabuf)) { 2132 if (IS_ERR(buf->dmabuf)) {
2121 err = PTR_ERR(buf->dmabuf); 2133 err = PTR_ERR(buf->dmabuf);
@@ -2135,7 +2147,9 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
2135 return fd; 2147 return fd;
2136 2148
2137err_bfree: 2149err_bfree:
2138 gk20a_gmmu_free(g, &buf->mem); 2150 gk20a_gmmu_free(g, buf->mem);
2151err_memfree:
2152 kfree(buf->mem);
2139err_kfree: 2153err_kfree:
2140 kfree(buf); 2154 kfree(buf);
2141 return err; 2155 return err;
@@ -2831,7 +2845,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct mem_desc *mem)
2831 return -EINVAL; 2845 return -EINVAL;
2832 2846
2833 alloc = (struct gk20a_page_alloc *) 2847 alloc = (struct gk20a_page_alloc *)
2834 g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); 2848 sg_dma_address(mem->sgt->sgl);
2835 2849
2836 list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { 2850 list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
2837 if (gk20a_last_fence) 2851 if (gk20a_last_fence)
@@ -2882,12 +2896,28 @@ int gk20a_gmmu_alloc_attr_vid(struct gk20a *g, enum dma_attr attr,
2882 return gk20a_gmmu_alloc_attr_vid_at(g, attr, size, mem, 0); 2896 return gk20a_gmmu_alloc_attr_vid_at(g, attr, size, mem, 0);
2883} 2897}
2884 2898
2899#if defined(CONFIG_GK20A_VIDMEM)
2900static u64 __gk20a_gmmu_alloc(struct gk20a_allocator *allocator, dma_addr_t at,
2901 size_t size)
2902{
2903 u64 addr = 0;
2904
2905 if (at)
2906 addr = gk20a_alloc_fixed(allocator, at, size);
2907 else
2908 addr = gk20a_alloc(allocator, size);
2909
2910 return addr;
2911}
2912#endif
2913
2885int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, 2914int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
2886 size_t size, struct mem_desc *mem, dma_addr_t at) 2915 size_t size, struct mem_desc *mem, dma_addr_t at)
2887{ 2916{
2888#if defined(CONFIG_GK20A_VIDMEM) 2917#if defined(CONFIG_GK20A_VIDMEM)
2889 u64 addr; 2918 u64 addr;
2890 int err; 2919 int err;
2920 unsigned long end_jiffies = jiffies + msecs_to_jiffies(1000);
2891 struct gk20a_allocator *vidmem_alloc = g->mm.vidmem.cleared ? 2921 struct gk20a_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
2892 &g->mm.vidmem.allocator : 2922 &g->mm.vidmem.allocator :
2893 &g->mm.vidmem.bootstrap_allocator; 2923 &g->mm.vidmem.bootstrap_allocator;
@@ -2901,19 +2931,21 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
2901 * are not done anyway */ 2931 * are not done anyway */
2902 WARN_ON(attr != 0 && attr != DMA_ATTR_NO_KERNEL_MAPPING); 2932 WARN_ON(attr != 0 && attr != DMA_ATTR_NO_KERNEL_MAPPING);
2903 2933
2904 if (at) { 2934 do {
2905 addr = gk20a_alloc_fixed(vidmem_alloc, at, size); 2935 addr = __gk20a_gmmu_alloc(vidmem_alloc, at, size);
2906 if (!addr) 2936 if (!addr) /* Possible OOM */
2907 return -ENOMEM; 2937 usleep_range(100, 300);
2938 else
2939 break;
2940 } while (time_before(jiffies, end_jiffies));
2908 2941
2909 mem->fixed = true; 2942 if (!addr)
2910 } else { 2943 return -ENOMEM;
2911 addr = gk20a_alloc(vidmem_alloc, size);
2912 if (!addr)
2913 return -ENOMEM;
2914 2944
2945 if (at)
2946 mem->fixed = true;
2947 else
2915 mem->fixed = false; 2948 mem->fixed = false;
2916 }
2917 2949
2918 mem->sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL); 2950 mem->sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
2919 if (!mem->sgt) { 2951 if (!mem->sgt) {
@@ -2931,6 +2963,8 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
2931 mem->size = size; 2963 mem->size = size;
2932 mem->aperture = APERTURE_VIDMEM; 2964 mem->aperture = APERTURE_VIDMEM;
2933 2965
2966 INIT_LIST_HEAD(&mem->clear_list_entry);
2967
2934 gk20a_dbg_fn("done at 0x%llx size %zu", addr, size); 2968 gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);
2935 2969
2936 return 0; 2970 return 0;
@@ -2949,11 +2983,18 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr,
2949 struct mem_desc *mem) 2983 struct mem_desc *mem)
2950{ 2984{
2951#if defined(CONFIG_GK20A_VIDMEM) 2985#if defined(CONFIG_GK20A_VIDMEM)
2952 gk20a_gmmu_clear_vidmem_mem(g, mem); 2986 bool was_empty;
2953 gk20a_free(&g->mm.vidmem.allocator, sg_dma_address(mem->sgt->sgl)); 2987
2954 gk20a_free_sgtable(&mem->sgt); 2988 mutex_lock(&g->mm.vidmem.clear_list_mutex);
2955 mem->size = 0; 2989 was_empty = list_empty(&g->mm.vidmem.clear_list_head);
2956 mem->aperture = APERTURE_INVALID; 2990 list_add_tail(&mem->clear_list_entry,
2991 &g->mm.vidmem.clear_list_head);
2992 mutex_unlock(&g->mm.vidmem.clear_list_mutex);
2993
2994 if (was_empty) {
2995 cancel_work_sync(&g->mm.vidmem_clear_mem_worker);
2996 schedule_work(&g->mm.vidmem_clear_mem_worker);
2997 }
2957#endif 2998#endif
2958} 2999}
2959 3000
@@ -2975,6 +3016,42 @@ void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem)
2975 return gk20a_gmmu_free_attr(g, 0, mem); 3016 return gk20a_gmmu_free_attr(g, 0, mem);
2976} 3017}
2977 3018
3019#if defined(CONFIG_GK20A_VIDMEM)
3020static struct mem_desc *get_pending_mem_desc(struct mm_gk20a *mm)
3021{
3022 struct mem_desc *mem = NULL;
3023
3024 mutex_lock(&mm->vidmem.clear_list_mutex);
3025 mem = list_first_entry_or_null(&mm->vidmem.clear_list_head,
3026 struct mem_desc, clear_list_entry);
3027 if (mem)
3028 list_del_init(&mem->clear_list_entry);
3029 mutex_unlock(&mm->vidmem.clear_list_mutex);
3030
3031 return mem;
3032}
3033
3034static void gk20a_vidmem_clear_mem_worker(struct work_struct *work)
3035{
3036 struct mm_gk20a *mm = container_of(work, struct mm_gk20a,
3037 vidmem_clear_mem_worker);
3038 struct gk20a *g = mm->g;
3039 struct mem_desc *mem;
3040
3041 while ((mem = get_pending_mem_desc(mm)) != NULL) {
3042 gk20a_gmmu_clear_vidmem_mem(g, mem);
3043 gk20a_free(&g->mm.vidmem.allocator,
3044 sg_dma_address(mem->sgt->sgl));
3045 gk20a_free_sgtable(&mem->sgt);
3046
3047 mem->size = 0;
3048 mem->aperture = APERTURE_INVALID;
3049
3050 kfree(mem);
3051 }
3052}
3053#endif
3054
2978u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture, 3055u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture,
2979 u32 sysmem_mask, u32 vidmem_mask) 3056 u32 sysmem_mask, u32 vidmem_mask)
2980{ 3057{
@@ -3544,7 +3621,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
3544 3621
3545 if (sgt) { 3622 if (sgt) {
3546 alloc = (struct gk20a_page_alloc *) 3623 alloc = (struct gk20a_page_alloc *)
3547 g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0); 3624 sg_dma_address(sgt->sgl);
3548 3625
3549 list_for_each_entry(chunk, &alloc->alloc_chunks, 3626 list_for_each_entry(chunk, &alloc->alloc_chunks,
3550 list_entry) { 3627 list_entry) {
@@ -4918,6 +4995,8 @@ int gk20a_mm_suspend(struct gk20a *g)
4918{ 4995{
4919 gk20a_dbg_fn(""); 4996 gk20a_dbg_fn("");
4920 4997
4998 cancel_work_sync(&g->mm.vidmem_clear_mem_worker);
4999
4921 g->ops.mm.cbc_clean(g); 5000 g->ops.mm.cbc_clean(g);
4922 g->ops.mm.l2_flush(g, false); 5001 g->ops.mm.l2_flush(g, false);
4923 5002