diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 170 |
1 files changed, 113 insertions, 57 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 76c33512..e79cc1d1 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -111,7 +111,7 @@ static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm); | |||
111 | struct gk20a_dmabuf_priv { | 111 | struct gk20a_dmabuf_priv { |
112 | struct mutex lock; | 112 | struct mutex lock; |
113 | 113 | ||
114 | struct gk20a_allocator *comptag_allocator; | 114 | struct gk20a_comptag_allocator *comptag_allocator; |
115 | struct gk20a_comptags comptags; | 115 | struct gk20a_comptags comptags; |
116 | 116 | ||
117 | struct dma_buf_attachment *attach; | 117 | struct dma_buf_attachment *attach; |
@@ -126,6 +126,41 @@ struct gk20a_dmabuf_priv { | |||
126 | 126 | ||
127 | static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm); | 127 | static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm); |
128 | 128 | ||
129 | static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator, | ||
130 | u32 *offset, u32 len) | ||
131 | { | ||
132 | unsigned long addr; | ||
133 | int err = 0; | ||
134 | |||
135 | mutex_lock(&allocator->lock); | ||
136 | addr = bitmap_find_next_zero_area(allocator->bitmap, allocator->size, | ||
137 | 0, len, 0); | ||
138 | if (addr < allocator->size) { | ||
139 | /* number zero is reserved; bitmap base is 1 */ | ||
140 | *offset = 1 + addr; | ||
141 | bitmap_set(allocator->bitmap, addr, len); | ||
142 | } else { | ||
143 | err = -ENOMEM; | ||
144 | } | ||
145 | mutex_unlock(&allocator->lock); | ||
146 | |||
147 | return err; | ||
148 | } | ||
149 | |||
150 | static void gk20a_comptaglines_free(struct gk20a_comptag_allocator *allocator, | ||
151 | u32 offset, u32 len) | ||
152 | { | ||
153 | /* number zero is reserved; bitmap base is 1 */ | ||
154 | u32 addr = offset - 1; | ||
155 | WARN_ON(offset == 0); | ||
156 | WARN_ON(addr > allocator->size); | ||
157 | WARN_ON(addr + len > allocator->size); | ||
158 | |||
159 | mutex_lock(&allocator->lock); | ||
160 | bitmap_clear(allocator->bitmap, addr, len); | ||
161 | mutex_unlock(&allocator->lock); | ||
162 | } | ||
163 | |||
129 | static void gk20a_mm_delete_priv(void *_priv) | 164 | static void gk20a_mm_delete_priv(void *_priv) |
130 | { | 165 | { |
131 | struct gk20a_buffer_state *s, *s_tmp; | 166 | struct gk20a_buffer_state *s, *s_tmp; |
@@ -135,8 +170,9 @@ static void gk20a_mm_delete_priv(void *_priv) | |||
135 | 170 | ||
136 | if (priv->comptags.lines) { | 171 | if (priv->comptags.lines) { |
137 | BUG_ON(!priv->comptag_allocator); | 172 | BUG_ON(!priv->comptag_allocator); |
138 | gk20a_bfree(priv->comptag_allocator, | 173 | gk20a_comptaglines_free(priv->comptag_allocator, |
139 | priv->comptags.real_offset); | 174 | priv->comptags.offset, |
175 | priv->comptags.allocated_lines); | ||
140 | } | 176 | } |
141 | 177 | ||
142 | /* Free buffer states */ | 178 | /* Free buffer states */ |
@@ -221,19 +257,21 @@ void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, | |||
221 | static int gk20a_alloc_comptags(struct gk20a *g, | 257 | static int gk20a_alloc_comptags(struct gk20a *g, |
222 | struct device *dev, | 258 | struct device *dev, |
223 | struct dma_buf *dmabuf, | 259 | struct dma_buf *dmabuf, |
224 | struct gk20a_allocator *allocator, | 260 | struct gk20a_comptag_allocator *allocator, |
225 | u32 lines, bool user_mappable, | 261 | u32 lines, bool user_mappable, |
226 | u64 *ctag_map_win_size, | 262 | u64 *ctag_map_win_size, |
227 | u32 *ctag_map_win_ctagline) | 263 | u32 *ctag_map_win_ctagline) |
228 | { | 264 | { |
229 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | 265 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); |
230 | u32 ctaglines_to_allocate; | 266 | u32 ctaglines_allocsize; |
231 | u32 ctagline_align = 1; | 267 | u32 ctagline_align; |
232 | u32 offset; | 268 | u32 offset; |
269 | u32 alignment_lines; | ||
233 | const u32 aggregate_cacheline_sz = | 270 | const u32 aggregate_cacheline_sz = |
234 | g->gr.cacheline_size * g->gr.slices_per_ltc * | 271 | g->gr.cacheline_size * g->gr.slices_per_ltc * |
235 | g->ltc_count; | 272 | g->ltc_count; |
236 | const u32 small_pgsz = 4096; | 273 | const u32 small_pgsz = 4096; |
274 | int err; | ||
237 | 275 | ||
238 | if (!priv) | 276 | if (!priv) |
239 | return -ENOSYS; | 277 | return -ENOSYS; |
@@ -242,17 +280,19 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
242 | return -EINVAL; | 280 | return -EINVAL; |
243 | 281 | ||
244 | if (!user_mappable) { | 282 | if (!user_mappable) { |
245 | ctaglines_to_allocate = lines; | 283 | ctaglines_allocsize = lines; |
284 | ctagline_align = 1; | ||
246 | } else { | 285 | } else { |
247 | /* Unfortunately, we cannot use allocation alignment | 286 | /* |
248 | * here, since compbits per cacheline is not always a | 287 | * For security, align the allocation on a page, and reserve |
249 | * power of two. So, we just have to allocate enough | 288 | * whole pages. Unfortunately, we cannot ask the allocator to |
250 | * extra that we're guaranteed to find a ctagline | 289 | * align here, since compbits per cacheline is not always a |
251 | * inside the allocation so that: 1) it is the first | 290 | * power of two. So, we just have to allocate enough extra that |
252 | * ctagline in a cacheline that starts at a page | 291 | * we're guaranteed to find a ctagline inside the allocation so |
253 | * boundary, and 2) we can add enough overallocation | 292 | * that: 1) it is the first ctagline in a cacheline that starts |
254 | * that the ctaglines of the succeeding allocation | 293 | * at a page boundary, and 2) we can add enough overallocation |
255 | * are on different page than ours | 294 | * that the ctaglines of the succeeding allocation are on |
295 | * different page than ours. | ||
256 | */ | 296 | */ |
257 | 297 | ||
258 | ctagline_align = | 298 | ctagline_align = |
@@ -260,7 +300,7 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
260 | aggregate_cacheline_sz) * | 300 | aggregate_cacheline_sz) * |
261 | g->gr.comptags_per_cacheline; | 301 | g->gr.comptags_per_cacheline; |
262 | 302 | ||
263 | ctaglines_to_allocate = | 303 | ctaglines_allocsize = |
264 | /* for alignment */ | 304 | /* for alignment */ |
265 | ctagline_align + | 305 | ctagline_align + |
266 | 306 | ||
@@ -272,37 +312,71 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
272 | DIV_ROUND_UP(aggregate_cacheline_sz, small_pgsz) * | 312 | DIV_ROUND_UP(aggregate_cacheline_sz, small_pgsz) * |
273 | g->gr.comptags_per_cacheline; | 313 | g->gr.comptags_per_cacheline; |
274 | 314 | ||
275 | if (ctaglines_to_allocate < lines) | 315 | if (ctaglines_allocsize < lines) |
276 | return -EINVAL; /* integer overflow */ | 316 | return -EINVAL; /* integer overflow */ |
277 | } | 317 | } |
278 | 318 | ||
279 | /* store the allocator so we can use it when we free the ctags */ | 319 | /* store the allocator so we can use it when we free the ctags */ |
280 | priv->comptag_allocator = allocator; | 320 | priv->comptag_allocator = allocator; |
281 | offset = gk20a_balloc(allocator, ctaglines_to_allocate); | 321 | err = gk20a_comptaglines_alloc(allocator, &offset, |
282 | if (!offset) | 322 | ctaglines_allocsize); |
283 | return -ENOMEM; | 323 | if (err) |
324 | return err; | ||
284 | 325 | ||
285 | priv->comptags.lines = lines; | 326 | /* |
286 | priv->comptags.real_offset = offset; | 327 | * offset needs to be at the start of a page/cacheline boundary; |
287 | priv->comptags.allocated_lines = ctaglines_to_allocate; | 328 | * prune the preceding ctaglines that were allocated for alignment. |
329 | */ | ||
330 | alignment_lines = | ||
331 | DIV_ROUND_UP(offset, ctagline_align) * ctagline_align - offset; | ||
332 | if (alignment_lines) { | ||
333 | gk20a_comptaglines_free(allocator, offset, alignment_lines); | ||
334 | offset += alignment_lines; | ||
335 | ctaglines_allocsize -= alignment_lines; | ||
336 | } | ||
288 | 337 | ||
338 | /* | ||
339 | * check if we can prune the trailing, too; we just need to reserve | ||
340 | * whole pages and ctagcachelines. | ||
341 | */ | ||
289 | if (user_mappable) { | 342 | if (user_mappable) { |
290 | u64 win_size = | 343 | u32 needed_cachelines = |
344 | DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline); | ||
345 | u32 needed_bytes = round_up(needed_cachelines * | ||
346 | aggregate_cacheline_sz, | ||
347 | small_pgsz); | ||
348 | u32 first_unneeded_cacheline = | ||
349 | DIV_ROUND_UP(needed_bytes, aggregate_cacheline_sz); | ||
350 | u32 needed_ctaglines = first_unneeded_cacheline * | ||
351 | g->gr.comptags_per_cacheline; | ||
352 | u64 win_size; | ||
353 | |||
354 | if (needed_ctaglines < ctaglines_allocsize) { | ||
355 | gk20a_comptaglines_free(allocator, | ||
356 | offset + needed_ctaglines, | ||
357 | ctaglines_allocsize - needed_ctaglines); | ||
358 | ctaglines_allocsize = needed_ctaglines; | ||
359 | } | ||
360 | |||
361 | *ctag_map_win_ctagline = offset; | ||
362 | win_size = | ||
291 | DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) * | 363 | DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) * |
292 | aggregate_cacheline_sz; | 364 | aggregate_cacheline_sz; |
293 | win_size = roundup(win_size, small_pgsz); | ||
294 | 365 | ||
295 | offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align; | 366 | *ctag_map_win_size = round_up(win_size, small_pgsz); |
296 | *ctag_map_win_ctagline = offset; | ||
297 | *ctag_map_win_size = win_size; | ||
298 | } | 367 | } |
299 | 368 | ||
300 | |||
301 | priv->comptags.offset = offset; | 369 | priv->comptags.offset = offset; |
370 | priv->comptags.lines = lines; | ||
371 | priv->comptags.allocated_lines = ctaglines_allocsize; | ||
372 | priv->comptags.user_mappable = user_mappable; | ||
302 | 373 | ||
303 | return 0; | 374 | return 0; |
304 | } | 375 | } |
305 | 376 | ||
377 | |||
378 | |||
379 | |||
306 | static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) | 380 | static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) |
307 | { | 381 | { |
308 | gk20a_dbg_fn(""); | 382 | gk20a_dbg_fn(""); |
@@ -1412,7 +1486,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1412 | struct vm_gk20a_mapping_batch *batch) | 1486 | struct vm_gk20a_mapping_batch *batch) |
1413 | { | 1487 | { |
1414 | struct gk20a *g = gk20a_from_vm(vm); | 1488 | struct gk20a *g = gk20a_from_vm(vm); |
1415 | struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags; | 1489 | struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags; |
1416 | struct device *d = dev_from_vm(vm); | 1490 | struct device *d = dev_from_vm(vm); |
1417 | struct mapped_buffer_node *mapped_buffer = NULL; | 1491 | struct mapped_buffer_node *mapped_buffer = NULL; |
1418 | bool inserted = false, va_allocated = false; | 1492 | bool inserted = false, va_allocated = false; |
@@ -1579,32 +1653,14 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1579 | gk20a_get_comptags(d, dmabuf, &comptags); | 1653 | gk20a_get_comptags(d, dmabuf, &comptags); |
1580 | clear_ctags = true; | 1654 | clear_ctags = true; |
1581 | 1655 | ||
1582 | comptags.user_mappable = user_mappable; | 1656 | if (comptags.lines < comptags.allocated_lines) { |
1583 | 1657 | /* clear tail-padding comptags */ | |
1584 | if (user_mappable) { | 1658 | u32 ctagmin = comptags.offset + comptags.lines; |
1585 | /* comptags for the buffer will be | 1659 | u32 ctagmax = comptags.offset + |
1586 | cleared later, but we need to make | 1660 | comptags.allocated_lines - 1; |
1587 | sure the whole comptags allocation | 1661 | |
1588 | (which may be bigger) is cleared in | 1662 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, |
1589 | order not to leak compbits */ | 1663 | ctagmin, ctagmax); |
1590 | |||
1591 | const u32 buffer_ctag_end = | ||
1592 | comptags.offset + comptags.lines; | ||
1593 | const u32 alloc_ctag_end = | ||
1594 | comptags.real_offset + | ||
1595 | comptags.allocated_lines; | ||
1596 | |||
1597 | if (comptags.real_offset < comptags.offset) | ||
1598 | g->ops.ltc.cbc_ctrl( | ||
1599 | g, gk20a_cbc_op_clear, | ||
1600 | comptags.real_offset, | ||
1601 | comptags.offset - 1); | ||
1602 | |||
1603 | if (buffer_ctag_end < alloc_ctag_end) | ||
1604 | g->ops.ltc.cbc_ctrl( | ||
1605 | g, gk20a_cbc_op_clear, | ||
1606 | buffer_ctag_end, | ||
1607 | alloc_ctag_end - 1); | ||
1608 | } | 1664 | } |
1609 | } | 1665 | } |
1610 | } | 1666 | } |