diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2016-08-25 05:04:55 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-09-01 12:12:15 -0400 |
commit | f43231f7a5b60cd7ebd3068dde04eaac43361d02 (patch) | |
tree | b55007ae1352e13e109c9f55a686bf5fe8ba3cbf | |
parent | 737d6346307ad7e87c0195f6e2e5d52cf4c3dd3b (diff) |
gpu: nvgpu: enable big page support for pci
While mapping the buffer, first check if buffer is in
vidmem, and if yes convert allocation into base address
And then walk through each chunk to decide the alignment
Add new API gk20a_mm_get_align() which returns the
alignment based on scatterlist and aperture, and use
this API to get alignment during mapping
Enable big page support for pci by unsetting disable_bigpage
Jira DNVGPU-97
Change-Id: I358dc98fac8103fdf9d2bde758e61b363fea9ae9
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1207673
(cherry picked from commit d14d42290eed4aa7a2dd2be25e8e996917a58e82)
Reviewed-on: http://git-master/r/1210959
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 72 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/pci.c | 1 |
2 files changed, 51 insertions, 22 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 81fa38bb..08fbfb80 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -2183,6 +2183,51 @@ err_kfree: | |||
2183 | #endif | 2183 | #endif |
2184 | } | 2184 | } |
2185 | 2185 | ||
2186 | static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl, | ||
2187 | enum gk20a_aperture aperture) | ||
2188 | { | ||
2189 | u64 align = 0, chunk_align = 0; | ||
2190 | u64 buf_addr; | ||
2191 | |||
2192 | if (aperture == APERTURE_VIDMEM) { | ||
2193 | struct gk20a_page_alloc *alloc = (struct gk20a_page_alloc *) | ||
2194 | sg_dma_address(sgl); | ||
2195 | struct page_alloc_chunk *chunk = NULL; | ||
2196 | |||
2197 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { | ||
2198 | chunk_align = 1ULL << __ffs(chunk->base | chunk->length); | ||
2199 | |||
2200 | if (align) | ||
2201 | align = min(align, chunk_align); | ||
2202 | else | ||
2203 | align = chunk_align; | ||
2204 | } | ||
2205 | |||
2206 | return align; | ||
2207 | } | ||
2208 | |||
2209 | buf_addr = (u64)sg_dma_address(sgl); | ||
2210 | |||
2211 | if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) { | ||
2212 | while (sgl) { | ||
2213 | buf_addr = (u64)sg_phys(sgl); | ||
2214 | chunk_align = 1ULL << __ffs(buf_addr | (u64)sgl->length); | ||
2215 | |||
2216 | if (align) | ||
2217 | align = min(align, chunk_align); | ||
2218 | else | ||
2219 | align = chunk_align; | ||
2220 | sgl = sg_next(sgl); | ||
2221 | } | ||
2222 | |||
2223 | return align; | ||
2224 | } | ||
2225 | |||
2226 | align = 1ULL << __ffs(buf_addr); | ||
2227 | |||
2228 | return align; | ||
2229 | } | ||
2230 | |||
2186 | u64 gk20a_vm_map(struct vm_gk20a *vm, | 2231 | u64 gk20a_vm_map(struct vm_gk20a *vm, |
2187 | struct dma_buf *dmabuf, | 2232 | struct dma_buf *dmabuf, |
2188 | u64 offset_align, | 2233 | u64 offset_align, |
@@ -2207,7 +2252,6 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
2207 | struct gk20a_comptags comptags; | 2252 | struct gk20a_comptags comptags; |
2208 | bool clear_ctags = false; | 2253 | bool clear_ctags = false; |
2209 | struct scatterlist *sgl; | 2254 | struct scatterlist *sgl; |
2210 | u64 buf_addr; | ||
2211 | u64 ctag_map_win_size = 0; | 2255 | u64 ctag_map_win_size = 0; |
2212 | u32 ctag_map_win_ctagline = 0; | 2256 | u32 ctag_map_win_ctagline = 0; |
2213 | struct vm_reserved_va_node *va_node = NULL; | 2257 | struct vm_reserved_va_node *va_node = NULL; |
@@ -2257,22 +2301,14 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
2257 | bfr.kind_v = kind; | 2301 | bfr.kind_v = kind; |
2258 | bfr.size = dmabuf->size; | 2302 | bfr.size = dmabuf->size; |
2259 | sgl = bfr.sgt->sgl; | 2303 | sgl = bfr.sgt->sgl; |
2260 | buf_addr = (u64)sg_dma_address(bfr.sgt->sgl); | ||
2261 | if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) { | ||
2262 | while (sgl) { | ||
2263 | u64 align; | ||
2264 | 2304 | ||
2265 | buf_addr = (u64)sg_phys(sgl); | 2305 | aperture = gk20a_dmabuf_aperture(g, dmabuf); |
2306 | if (aperture == APERTURE_INVALID) { | ||
2307 | err = -EINVAL; | ||
2308 | goto clean_up; | ||
2309 | } | ||
2266 | 2310 | ||
2267 | align = 1ULL << __ffs(buf_addr | (u64)sgl->length); | 2311 | bfr.align = gk20a_mm_get_align(g, sgl, aperture); |
2268 | if (bfr.align) | ||
2269 | bfr.align = min_t(u64, align, bfr.align); | ||
2270 | else | ||
2271 | bfr.align = align; | ||
2272 | sgl = sg_next(sgl); | ||
2273 | } | ||
2274 | } else | ||
2275 | bfr.align = 1ULL << __ffs(buf_addr); | ||
2276 | 2312 | ||
2277 | bfr.pgsz_idx = -1; | 2313 | bfr.pgsz_idx = -1; |
2278 | mapping_size = mapping_size ? mapping_size : bfr.size; | 2314 | mapping_size = mapping_size ? mapping_size : bfr.size; |
@@ -2388,12 +2424,6 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
2388 | ctag_offset += buffer_offset >> | 2424 | ctag_offset += buffer_offset >> |
2389 | ilog2(g->ops.fb.compression_page_size(g)); | 2425 | ilog2(g->ops.fb.compression_page_size(g)); |
2390 | 2426 | ||
2391 | aperture = gk20a_dmabuf_aperture(g, dmabuf); | ||
2392 | if (aperture == APERTURE_INVALID) { | ||
2393 | err = -EINVAL; | ||
2394 | goto clean_up; | ||
2395 | } | ||
2396 | |||
2397 | /* update gmmu ptes */ | 2427 | /* update gmmu ptes */ |
2398 | map_offset = g->ops.mm.gmmu_map(vm, map_offset, | 2428 | map_offset = g->ops.mm.gmmu_map(vm, map_offset, |
2399 | bfr.sgt, | 2429 | bfr.sgt, |
diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c index 5d058ed9..cbf444ad 100644 --- a/drivers/gpu/nvgpu/pci.c +++ b/drivers/gpu/nvgpu/pci.c | |||
@@ -56,7 +56,6 @@ static struct gk20a_platform nvgpu_pci_device = { | |||
56 | .default_big_page_size = SZ_64K, | 56 | .default_big_page_size = SZ_64K, |
57 | 57 | ||
58 | .ch_wdt_timeout_ms = 7000, | 58 | .ch_wdt_timeout_ms = 7000, |
59 | .disable_bigpage = true, | ||
60 | 59 | ||
61 | .has_ce = true, | 60 | .has_ce = true, |
62 | }; | 61 | }; |