diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2016-08-10 11:09:47 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-09-01 12:10:31 -0400 |
commit | 6a48f4b3350f933b171edd4fac4a6860e53c2d64 (patch) | |
tree | 2bdf546bfff5744f0c236acef02c2aa917e239cc /drivers/gpu/nvgpu/gk20a | |
parent | f79639f61858c377cf1f3facfc0ce631f787f0e6 (diff) |
gpu: nvgpu: clear vidmem buffers in worker
We clear buffers allocated in vidmem in buffer free path.
But to clear buffers, we need to submit CE jobs and this
could cause issues/races if free called from critical
path
Hence solve this by moving buffer clear/free to a worker
gk20a_gmmu_free_attr_vid() will now just put mem_desc into
a list and schedule a worker
And worker thread will traverse the list and clear/free
the allocations
In struct gk20a_vidmem_buf, mem variable is statically
allocated. But since we delay free of mem, convert this
variable into a pointer and allocate it dynamically
Since we delay free of vidmem memory, it is now possible
to face OOM conditions during allocations. Hence while
allocating block until we have sufficient memory
available with an upper limit of 1S
Jira DNVGPU-84
Change-Id: I7925590644afae50b6fc04c6e1e43bbaa1c220fd
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1201346
(cherry picked from commit b4dec4a30de2431369d677acca00e420f8e581a5)
Reviewed-on: http://git-master/r/1210950
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 129 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 5 |
2 files changed, 109 insertions, 25 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 65157ccd..c9681861 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -53,6 +53,10 @@ | |||
53 | */ | 53 | */ |
54 | #define GK20A_FORCE_PRAMIN_DEFAULT false | 54 | #define GK20A_FORCE_PRAMIN_DEFAULT false |
55 | 55 | ||
56 | #if defined(CONFIG_GK20A_VIDMEM) | ||
57 | static void gk20a_vidmem_clear_mem_worker(struct work_struct *work); | ||
58 | #endif | ||
59 | |||
56 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) | 60 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) |
57 | { | 61 | { |
58 | void *cpu_va; | 62 | void *cpu_va; |
@@ -437,7 +441,7 @@ struct gk20a_dmabuf_priv { | |||
437 | 441 | ||
438 | struct gk20a_vidmem_buf { | 442 | struct gk20a_vidmem_buf { |
439 | struct gk20a *g; | 443 | struct gk20a *g; |
440 | struct mem_desc mem; | 444 | struct mem_desc *mem; |
441 | struct dma_buf *dmabuf; | 445 | struct dma_buf *dmabuf; |
442 | void *dmabuf_priv; | 446 | void *dmabuf_priv; |
443 | void (*dmabuf_priv_delete)(void *); | 447 | void (*dmabuf_priv_delete)(void *); |
@@ -882,6 +886,10 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm) | |||
882 | mm->vidmem.bootstrap_base = bootstrap_base; | 886 | mm->vidmem.bootstrap_base = bootstrap_base; |
883 | mm->vidmem.bootstrap_size = bootstrap_size; | 887 | mm->vidmem.bootstrap_size = bootstrap_size; |
884 | 888 | ||
889 | INIT_WORK(&mm->vidmem_clear_mem_worker, gk20a_vidmem_clear_mem_worker); | ||
890 | INIT_LIST_HEAD(&mm->vidmem.clear_list_head); | ||
891 | mutex_init(&mm->vidmem.clear_list_mutex); | ||
892 | |||
885 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); | 893 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); |
886 | 894 | ||
887 | #endif | 895 | #endif |
@@ -1988,7 +1996,7 @@ static struct sg_table *gk20a_vidbuf_map_dma_buf( | |||
1988 | { | 1996 | { |
1989 | struct gk20a_vidmem_buf *buf = attach->dmabuf->priv; | 1997 | struct gk20a_vidmem_buf *buf = attach->dmabuf->priv; |
1990 | 1998 | ||
1991 | return buf->mem.sgt; | 1999 | return buf->mem->sgt; |
1992 | } | 2000 | } |
1993 | 2001 | ||
1994 | static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach, | 2002 | static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach, |
@@ -2006,7 +2014,7 @@ static void gk20a_vidbuf_release(struct dma_buf *dmabuf) | |||
2006 | if (buf->dmabuf_priv) | 2014 | if (buf->dmabuf_priv) |
2007 | buf->dmabuf_priv_delete(buf->dmabuf_priv); | 2015 | buf->dmabuf_priv_delete(buf->dmabuf_priv); |
2008 | 2016 | ||
2009 | gk20a_gmmu_free(buf->g, &buf->mem); | 2017 | gk20a_gmmu_free(buf->g, buf->mem); |
2010 | kfree(buf); | 2018 | kfree(buf); |
2011 | } | 2019 | } |
2012 | 2020 | ||
@@ -2065,12 +2073,12 @@ static struct dma_buf *gk20a_vidbuf_export(struct gk20a_vidmem_buf *buf) | |||
2065 | 2073 | ||
2066 | exp_info.priv = buf; | 2074 | exp_info.priv = buf; |
2067 | exp_info.ops = &gk20a_vidbuf_ops; | 2075 | exp_info.ops = &gk20a_vidbuf_ops; |
2068 | exp_info.size = buf->mem.size; | 2076 | exp_info.size = buf->mem->size; |
2069 | exp_info.flags = O_RDWR; | 2077 | exp_info.flags = O_RDWR; |
2070 | 2078 | ||
2071 | return dma_buf_export(&exp_info); | 2079 | return dma_buf_export(&exp_info); |
2072 | #else | 2080 | #else |
2073 | return dma_buf_export(buf, &gk20a_vidbuf_ops, buf->mem.size, | 2081 | return dma_buf_export(buf, &gk20a_vidbuf_ops, buf->mem->size, |
2074 | O_RDWR, NULL); | 2082 | O_RDWR, NULL); |
2075 | #endif | 2083 | #endif |
2076 | } | 2084 | } |
@@ -2112,10 +2120,14 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes) | |||
2112 | } | 2120 | } |
2113 | } | 2121 | } |
2114 | 2122 | ||
2115 | err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem); | 2123 | buf->mem = kzalloc(sizeof(struct mem_desc), GFP_KERNEL); |
2116 | if (err) | 2124 | if (!buf->mem) |
2117 | goto err_kfree; | 2125 | goto err_kfree; |
2118 | 2126 | ||
2127 | err = gk20a_gmmu_alloc_vid(g, bytes, buf->mem); | ||
2128 | if (err) | ||
2129 | goto err_memfree; | ||
2130 | |||
2119 | buf->dmabuf = gk20a_vidbuf_export(buf); | 2131 | buf->dmabuf = gk20a_vidbuf_export(buf); |
2120 | if (IS_ERR(buf->dmabuf)) { | 2132 | if (IS_ERR(buf->dmabuf)) { |
2121 | err = PTR_ERR(buf->dmabuf); | 2133 | err = PTR_ERR(buf->dmabuf); |
@@ -2135,7 +2147,9 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes) | |||
2135 | return fd; | 2147 | return fd; |
2136 | 2148 | ||
2137 | err_bfree: | 2149 | err_bfree: |
2138 | gk20a_gmmu_free(g, &buf->mem); | 2150 | gk20a_gmmu_free(g, buf->mem); |
2151 | err_memfree: | ||
2152 | kfree(buf->mem); | ||
2139 | err_kfree: | 2153 | err_kfree: |
2140 | kfree(buf); | 2154 | kfree(buf); |
2141 | return err; | 2155 | return err; |
@@ -2831,7 +2845,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct mem_desc *mem) | |||
2831 | return -EINVAL; | 2845 | return -EINVAL; |
2832 | 2846 | ||
2833 | alloc = (struct gk20a_page_alloc *) | 2847 | alloc = (struct gk20a_page_alloc *) |
2834 | g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); | 2848 | sg_dma_address(mem->sgt->sgl); |
2835 | 2849 | ||
2836 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { | 2850 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { |
2837 | if (gk20a_last_fence) | 2851 | if (gk20a_last_fence) |
@@ -2882,12 +2896,28 @@ int gk20a_gmmu_alloc_attr_vid(struct gk20a *g, enum dma_attr attr, | |||
2882 | return gk20a_gmmu_alloc_attr_vid_at(g, attr, size, mem, 0); | 2896 | return gk20a_gmmu_alloc_attr_vid_at(g, attr, size, mem, 0); |
2883 | } | 2897 | } |
2884 | 2898 | ||
2899 | #if defined(CONFIG_GK20A_VIDMEM) | ||
2900 | static u64 __gk20a_gmmu_alloc(struct gk20a_allocator *allocator, dma_addr_t at, | ||
2901 | size_t size) | ||
2902 | { | ||
2903 | u64 addr = 0; | ||
2904 | |||
2905 | if (at) | ||
2906 | addr = gk20a_alloc_fixed(allocator, at, size); | ||
2907 | else | ||
2908 | addr = gk20a_alloc(allocator, size); | ||
2909 | |||
2910 | return addr; | ||
2911 | } | ||
2912 | #endif | ||
2913 | |||
2885 | int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | 2914 | int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, |
2886 | size_t size, struct mem_desc *mem, dma_addr_t at) | 2915 | size_t size, struct mem_desc *mem, dma_addr_t at) |
2887 | { | 2916 | { |
2888 | #if defined(CONFIG_GK20A_VIDMEM) | 2917 | #if defined(CONFIG_GK20A_VIDMEM) |
2889 | u64 addr; | 2918 | u64 addr; |
2890 | int err; | 2919 | int err; |
2920 | unsigned long end_jiffies = jiffies + msecs_to_jiffies(1000); | ||
2891 | struct gk20a_allocator *vidmem_alloc = g->mm.vidmem.cleared ? | 2921 | struct gk20a_allocator *vidmem_alloc = g->mm.vidmem.cleared ? |
2892 | &g->mm.vidmem.allocator : | 2922 | &g->mm.vidmem.allocator : |
2893 | &g->mm.vidmem.bootstrap_allocator; | 2923 | &g->mm.vidmem.bootstrap_allocator; |
@@ -2901,19 +2931,21 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | |||
2901 | * are not done anyway */ | 2931 | * are not done anyway */ |
2902 | WARN_ON(attr != 0 && attr != DMA_ATTR_NO_KERNEL_MAPPING); | 2932 | WARN_ON(attr != 0 && attr != DMA_ATTR_NO_KERNEL_MAPPING); |
2903 | 2933 | ||
2904 | if (at) { | 2934 | do { |
2905 | addr = gk20a_alloc_fixed(vidmem_alloc, at, size); | 2935 | addr = __gk20a_gmmu_alloc(vidmem_alloc, at, size); |
2906 | if (!addr) | 2936 | if (!addr) /* Possible OOM */ |
2907 | return -ENOMEM; | 2937 | usleep_range(100, 300); |
2938 | else | ||
2939 | break; | ||
2940 | } while (time_before(jiffies, end_jiffies)); | ||
2908 | 2941 | ||
2909 | mem->fixed = true; | 2942 | if (!addr) |
2910 | } else { | 2943 | return -ENOMEM; |
2911 | addr = gk20a_alloc(vidmem_alloc, size); | ||
2912 | if (!addr) | ||
2913 | return -ENOMEM; | ||
2914 | 2944 | ||
2945 | if (at) | ||
2946 | mem->fixed = true; | ||
2947 | else | ||
2915 | mem->fixed = false; | 2948 | mem->fixed = false; |
2916 | } | ||
2917 | 2949 | ||
2918 | mem->sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL); | 2950 | mem->sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL); |
2919 | if (!mem->sgt) { | 2951 | if (!mem->sgt) { |
@@ -2931,6 +2963,8 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | |||
2931 | mem->size = size; | 2963 | mem->size = size; |
2932 | mem->aperture = APERTURE_VIDMEM; | 2964 | mem->aperture = APERTURE_VIDMEM; |
2933 | 2965 | ||
2966 | INIT_LIST_HEAD(&mem->clear_list_entry); | ||
2967 | |||
2934 | gk20a_dbg_fn("done at 0x%llx size %zu", addr, size); | 2968 | gk20a_dbg_fn("done at 0x%llx size %zu", addr, size); |
2935 | 2969 | ||
2936 | return 0; | 2970 | return 0; |
@@ -2949,11 +2983,18 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr, | |||
2949 | struct mem_desc *mem) | 2983 | struct mem_desc *mem) |
2950 | { | 2984 | { |
2951 | #if defined(CONFIG_GK20A_VIDMEM) | 2985 | #if defined(CONFIG_GK20A_VIDMEM) |
2952 | gk20a_gmmu_clear_vidmem_mem(g, mem); | 2986 | bool was_empty; |
2953 | gk20a_free(&g->mm.vidmem.allocator, sg_dma_address(mem->sgt->sgl)); | 2987 | |
2954 | gk20a_free_sgtable(&mem->sgt); | 2988 | mutex_lock(&g->mm.vidmem.clear_list_mutex); |
2955 | mem->size = 0; | 2989 | was_empty = list_empty(&g->mm.vidmem.clear_list_head); |
2956 | mem->aperture = APERTURE_INVALID; | 2990 | list_add_tail(&mem->clear_list_entry, |
2991 | &g->mm.vidmem.clear_list_head); | ||
2992 | mutex_unlock(&g->mm.vidmem.clear_list_mutex); | ||
2993 | |||
2994 | if (was_empty) { | ||
2995 | cancel_work_sync(&g->mm.vidmem_clear_mem_worker); | ||
2996 | schedule_work(&g->mm.vidmem_clear_mem_worker); | ||
2997 | } | ||
2957 | #endif | 2998 | #endif |
2958 | } | 2999 | } |
2959 | 3000 | ||
@@ -2975,6 +3016,42 @@ void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) | |||
2975 | return gk20a_gmmu_free_attr(g, 0, mem); | 3016 | return gk20a_gmmu_free_attr(g, 0, mem); |
2976 | } | 3017 | } |
2977 | 3018 | ||
3019 | #if defined(CONFIG_GK20A_VIDMEM) | ||
3020 | static struct mem_desc *get_pending_mem_desc(struct mm_gk20a *mm) | ||
3021 | { | ||
3022 | struct mem_desc *mem = NULL; | ||
3023 | |||
3024 | mutex_lock(&mm->vidmem.clear_list_mutex); | ||
3025 | mem = list_first_entry_or_null(&mm->vidmem.clear_list_head, | ||
3026 | struct mem_desc, clear_list_entry); | ||
3027 | if (mem) | ||
3028 | list_del_init(&mem->clear_list_entry); | ||
3029 | mutex_unlock(&mm->vidmem.clear_list_mutex); | ||
3030 | |||
3031 | return mem; | ||
3032 | } | ||
3033 | |||
3034 | static void gk20a_vidmem_clear_mem_worker(struct work_struct *work) | ||
3035 | { | ||
3036 | struct mm_gk20a *mm = container_of(work, struct mm_gk20a, | ||
3037 | vidmem_clear_mem_worker); | ||
3038 | struct gk20a *g = mm->g; | ||
3039 | struct mem_desc *mem; | ||
3040 | |||
3041 | while ((mem = get_pending_mem_desc(mm)) != NULL) { | ||
3042 | gk20a_gmmu_clear_vidmem_mem(g, mem); | ||
3043 | gk20a_free(&g->mm.vidmem.allocator, | ||
3044 | sg_dma_address(mem->sgt->sgl)); | ||
3045 | gk20a_free_sgtable(&mem->sgt); | ||
3046 | |||
3047 | mem->size = 0; | ||
3048 | mem->aperture = APERTURE_INVALID; | ||
3049 | |||
3050 | kfree(mem); | ||
3051 | } | ||
3052 | } | ||
3053 | #endif | ||
3054 | |||
2978 | u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture, | 3055 | u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture, |
2979 | u32 sysmem_mask, u32 vidmem_mask) | 3056 | u32 sysmem_mask, u32 vidmem_mask) |
2980 | { | 3057 | { |
@@ -3544,7 +3621,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
3544 | 3621 | ||
3545 | if (sgt) { | 3622 | if (sgt) { |
3546 | alloc = (struct gk20a_page_alloc *) | 3623 | alloc = (struct gk20a_page_alloc *) |
3547 | g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0); | 3624 | sg_dma_address(sgt->sgl); |
3548 | 3625 | ||
3549 | list_for_each_entry(chunk, &alloc->alloc_chunks, | 3626 | list_for_each_entry(chunk, &alloc->alloc_chunks, |
3550 | list_entry) { | 3627 | list_entry) { |
@@ -4918,6 +4995,8 @@ int gk20a_mm_suspend(struct gk20a *g) | |||
4918 | { | 4995 | { |
4919 | gk20a_dbg_fn(""); | 4996 | gk20a_dbg_fn(""); |
4920 | 4997 | ||
4998 | cancel_work_sync(&g->mm.vidmem_clear_mem_worker); | ||
4999 | |||
4921 | g->ops.mm.cbc_clean(g); | 5000 | g->ops.mm.cbc_clean(g); |
4922 | g->ops.mm.l2_flush(g, false); | 5001 | g->ops.mm.l2_flush(g, false); |
4923 | 5002 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index c6360955..54d3dfd0 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -71,6 +71,7 @@ struct mem_desc { | |||
71 | size_t size; | 71 | size_t size; |
72 | u64 gpu_va; | 72 | u64 gpu_va; |
73 | bool fixed; /* vidmem only */ | 73 | bool fixed; /* vidmem only */ |
74 | struct list_head clear_list_entry; /* vidmem only */ | ||
74 | }; | 75 | }; |
75 | 76 | ||
76 | struct mem_desc_sub { | 77 | struct mem_desc_sub { |
@@ -414,7 +415,11 @@ struct mm_gk20a { | |||
414 | 415 | ||
415 | u32 ce_ctx_id; | 416 | u32 ce_ctx_id; |
416 | bool cleared; | 417 | bool cleared; |
418 | |||
419 | struct list_head clear_list_head; | ||
420 | struct mutex clear_list_mutex; | ||
417 | } vidmem; | 421 | } vidmem; |
422 | struct work_struct vidmem_clear_mem_worker; | ||
418 | }; | 423 | }; |
419 | 424 | ||
420 | int gk20a_mm_init(struct mm_gk20a *mm); | 425 | int gk20a_mm_init(struct mm_gk20a *mm); |