summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2016-08-25 05:04:55 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-09-01 12:12:15 -0400
commitf43231f7a5b60cd7ebd3068dde04eaac43361d02 (patch)
treeb55007ae1352e13e109c9f55a686bf5fe8ba3cbf /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent737d6346307ad7e87c0195f6e2e5d52cf4c3dd3b (diff)
gpu: nvgpu: enable big page support for pci
While mapping the buffer, first check if buffer is in vidmem, and if yes convert allocation into base address And then walk through each chunk to decide the alignment Add new API gk20a_mm_get_align() which returns the alignment based on scatterlist and aperture, and use this API to get alignment during mapping Enable big page support for pci by unsetting disable_bigpage Jira DNVGPU-97 Change-Id: I358dc98fac8103fdf9d2bde758e61b363fea9ae9 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1207673 (cherry picked from commit d14d42290eed4aa7a2dd2be25e8e996917a58e82) Reviewed-on: http://git-master/r/1210959 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c72
1 files changed, 51 insertions, 21 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 81fa38bb..08fbfb80 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -2183,6 +2183,51 @@ err_kfree:
2183#endif 2183#endif
2184} 2184}
2185 2185
2186static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl,
2187 enum gk20a_aperture aperture)
2188{
2189 u64 align = 0, chunk_align = 0;
2190 u64 buf_addr;
2191
2192 if (aperture == APERTURE_VIDMEM) {
2193 struct gk20a_page_alloc *alloc = (struct gk20a_page_alloc *)
2194 sg_dma_address(sgl);
2195 struct page_alloc_chunk *chunk = NULL;
2196
2197 list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
2198 chunk_align = 1ULL << __ffs(chunk->base | chunk->length);
2199
2200 if (align)
2201 align = min(align, chunk_align);
2202 else
2203 align = chunk_align;
2204 }
2205
2206 return align;
2207 }
2208
2209 buf_addr = (u64)sg_dma_address(sgl);
2210
2211 if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) {
2212 while (sgl) {
2213 buf_addr = (u64)sg_phys(sgl);
2214 chunk_align = 1ULL << __ffs(buf_addr | (u64)sgl->length);
2215
2216 if (align)
2217 align = min(align, chunk_align);
2218 else
2219 align = chunk_align;
2220 sgl = sg_next(sgl);
2221 }
2222
2223 return align;
2224 }
2225
2226 align = 1ULL << __ffs(buf_addr);
2227
2228 return align;
2229}
2230
2186u64 gk20a_vm_map(struct vm_gk20a *vm, 2231u64 gk20a_vm_map(struct vm_gk20a *vm,
2187 struct dma_buf *dmabuf, 2232 struct dma_buf *dmabuf,
2188 u64 offset_align, 2233 u64 offset_align,
@@ -2207,7 +2252,6 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
2207 struct gk20a_comptags comptags; 2252 struct gk20a_comptags comptags;
2208 bool clear_ctags = false; 2253 bool clear_ctags = false;
2209 struct scatterlist *sgl; 2254 struct scatterlist *sgl;
2210 u64 buf_addr;
2211 u64 ctag_map_win_size = 0; 2255 u64 ctag_map_win_size = 0;
2212 u32 ctag_map_win_ctagline = 0; 2256 u32 ctag_map_win_ctagline = 0;
2213 struct vm_reserved_va_node *va_node = NULL; 2257 struct vm_reserved_va_node *va_node = NULL;
@@ -2257,22 +2301,14 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
2257 bfr.kind_v = kind; 2301 bfr.kind_v = kind;
2258 bfr.size = dmabuf->size; 2302 bfr.size = dmabuf->size;
2259 sgl = bfr.sgt->sgl; 2303 sgl = bfr.sgt->sgl;
2260 buf_addr = (u64)sg_dma_address(bfr.sgt->sgl);
2261 if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) {
2262 while (sgl) {
2263 u64 align;
2264 2304
2265 buf_addr = (u64)sg_phys(sgl); 2305 aperture = gk20a_dmabuf_aperture(g, dmabuf);
2306 if (aperture == APERTURE_INVALID) {
2307 err = -EINVAL;
2308 goto clean_up;
2309 }
2266 2310
2267 align = 1ULL << __ffs(buf_addr | (u64)sgl->length); 2311 bfr.align = gk20a_mm_get_align(g, sgl, aperture);
2268 if (bfr.align)
2269 bfr.align = min_t(u64, align, bfr.align);
2270 else
2271 bfr.align = align;
2272 sgl = sg_next(sgl);
2273 }
2274 } else
2275 bfr.align = 1ULL << __ffs(buf_addr);
2276 2312
2277 bfr.pgsz_idx = -1; 2313 bfr.pgsz_idx = -1;
2278 mapping_size = mapping_size ? mapping_size : bfr.size; 2314 mapping_size = mapping_size ? mapping_size : bfr.size;
@@ -2388,12 +2424,6 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
2388 ctag_offset += buffer_offset >> 2424 ctag_offset += buffer_offset >>
2389 ilog2(g->ops.fb.compression_page_size(g)); 2425 ilog2(g->ops.fb.compression_page_size(g));
2390 2426
2391 aperture = gk20a_dmabuf_aperture(g, dmabuf);
2392 if (aperture == APERTURE_INVALID) {
2393 err = -EINVAL;
2394 goto clean_up;
2395 }
2396
2397 /* update gmmu ptes */ 2427 /* update gmmu ptes */
2398 map_offset = g->ops.mm.gmmu_map(vm, map_offset, 2428 map_offset = g->ops.mm.gmmu_map(vm, map_offset,
2399 bfr.sgt, 2429 bfr.sgt,