summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c170
1 files changed, 113 insertions, 57 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 76c33512..e79cc1d1 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -111,7 +111,7 @@ static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm);
111struct gk20a_dmabuf_priv { 111struct gk20a_dmabuf_priv {
112 struct mutex lock; 112 struct mutex lock;
113 113
114 struct gk20a_allocator *comptag_allocator; 114 struct gk20a_comptag_allocator *comptag_allocator;
115 struct gk20a_comptags comptags; 115 struct gk20a_comptags comptags;
116 116
117 struct dma_buf_attachment *attach; 117 struct dma_buf_attachment *attach;
@@ -126,6 +126,41 @@ struct gk20a_dmabuf_priv {
126 126
127static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm); 127static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm);
128 128
129static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
130 u32 *offset, u32 len)
131{
132 unsigned long addr;
133 int err = 0;
134
135 mutex_lock(&allocator->lock);
136 addr = bitmap_find_next_zero_area(allocator->bitmap, allocator->size,
137 0, len, 0);
138 if (addr < allocator->size) {
139 /* number zero is reserved; bitmap base is 1 */
140 *offset = 1 + addr;
141 bitmap_set(allocator->bitmap, addr, len);
142 } else {
143 err = -ENOMEM;
144 }
145 mutex_unlock(&allocator->lock);
146
147 return err;
148}
149
150static void gk20a_comptaglines_free(struct gk20a_comptag_allocator *allocator,
151 u32 offset, u32 len)
152{
153 /* number zero is reserved; bitmap base is 1 */
154 u32 addr = offset - 1;
155 WARN_ON(offset == 0);
156 WARN_ON(addr > allocator->size);
157 WARN_ON(addr + len > allocator->size);
158
159 mutex_lock(&allocator->lock);
160 bitmap_clear(allocator->bitmap, addr, len);
161 mutex_unlock(&allocator->lock);
162}
163
129static void gk20a_mm_delete_priv(void *_priv) 164static void gk20a_mm_delete_priv(void *_priv)
130{ 165{
131 struct gk20a_buffer_state *s, *s_tmp; 166 struct gk20a_buffer_state *s, *s_tmp;
@@ -135,8 +170,9 @@ static void gk20a_mm_delete_priv(void *_priv)
135 170
136 if (priv->comptags.lines) { 171 if (priv->comptags.lines) {
137 BUG_ON(!priv->comptag_allocator); 172 BUG_ON(!priv->comptag_allocator);
138 gk20a_bfree(priv->comptag_allocator, 173 gk20a_comptaglines_free(priv->comptag_allocator,
139 priv->comptags.real_offset); 174 priv->comptags.offset,
175 priv->comptags.allocated_lines);
140 } 176 }
141 177
142 /* Free buffer states */ 178 /* Free buffer states */
@@ -221,19 +257,21 @@ void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
221static int gk20a_alloc_comptags(struct gk20a *g, 257static int gk20a_alloc_comptags(struct gk20a *g,
222 struct device *dev, 258 struct device *dev,
223 struct dma_buf *dmabuf, 259 struct dma_buf *dmabuf,
224 struct gk20a_allocator *allocator, 260 struct gk20a_comptag_allocator *allocator,
225 u32 lines, bool user_mappable, 261 u32 lines, bool user_mappable,
226 u64 *ctag_map_win_size, 262 u64 *ctag_map_win_size,
227 u32 *ctag_map_win_ctagline) 263 u32 *ctag_map_win_ctagline)
228{ 264{
229 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 265 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
230 u32 ctaglines_to_allocate; 266 u32 ctaglines_allocsize;
231 u32 ctagline_align = 1; 267 u32 ctagline_align;
232 u32 offset; 268 u32 offset;
269 u32 alignment_lines;
233 const u32 aggregate_cacheline_sz = 270 const u32 aggregate_cacheline_sz =
234 g->gr.cacheline_size * g->gr.slices_per_ltc * 271 g->gr.cacheline_size * g->gr.slices_per_ltc *
235 g->ltc_count; 272 g->ltc_count;
236 const u32 small_pgsz = 4096; 273 const u32 small_pgsz = 4096;
274 int err;
237 275
238 if (!priv) 276 if (!priv)
239 return -ENOSYS; 277 return -ENOSYS;
@@ -242,17 +280,19 @@ static int gk20a_alloc_comptags(struct gk20a *g,
242 return -EINVAL; 280 return -EINVAL;
243 281
244 if (!user_mappable) { 282 if (!user_mappable) {
245 ctaglines_to_allocate = lines; 283 ctaglines_allocsize = lines;
284 ctagline_align = 1;
246 } else { 285 } else {
247 /* Unfortunately, we cannot use allocation alignment 286 /*
248 * here, since compbits per cacheline is not always a 287 * For security, align the allocation on a page, and reserve
249 * power of two. So, we just have to allocate enough 288 * whole pages. Unfortunately, we cannot ask the allocator to
250 * extra that we're guaranteed to find a ctagline 289 * align here, since compbits per cacheline is not always a
251 * inside the allocation so that: 1) it is the first 290 * power of two. So, we just have to allocate enough extra that
252 * ctagline in a cacheline that starts at a page 291 * we're guaranteed to find a ctagline inside the allocation so
253 * boundary, and 2) we can add enough overallocation 292 * that: 1) it is the first ctagline in a cacheline that starts
254 * that the ctaglines of the succeeding allocation 293 * at a page boundary, and 2) we can add enough overallocation
255 * are on different page than ours 294 * that the ctaglines of the succeeding allocation are on
295 * different page than ours.
256 */ 296 */
257 297
258 ctagline_align = 298 ctagline_align =
@@ -260,7 +300,7 @@ static int gk20a_alloc_comptags(struct gk20a *g,
260 aggregate_cacheline_sz) * 300 aggregate_cacheline_sz) *
261 g->gr.comptags_per_cacheline; 301 g->gr.comptags_per_cacheline;
262 302
263 ctaglines_to_allocate = 303 ctaglines_allocsize =
264 /* for alignment */ 304 /* for alignment */
265 ctagline_align + 305 ctagline_align +
266 306
@@ -272,37 +312,71 @@ static int gk20a_alloc_comptags(struct gk20a *g,
272 DIV_ROUND_UP(aggregate_cacheline_sz, small_pgsz) * 312 DIV_ROUND_UP(aggregate_cacheline_sz, small_pgsz) *
273 g->gr.comptags_per_cacheline; 313 g->gr.comptags_per_cacheline;
274 314
275 if (ctaglines_to_allocate < lines) 315 if (ctaglines_allocsize < lines)
276 return -EINVAL; /* integer overflow */ 316 return -EINVAL; /* integer overflow */
277 } 317 }
278 318
279 /* store the allocator so we can use it when we free the ctags */ 319 /* store the allocator so we can use it when we free the ctags */
280 priv->comptag_allocator = allocator; 320 priv->comptag_allocator = allocator;
281 offset = gk20a_balloc(allocator, ctaglines_to_allocate); 321 err = gk20a_comptaglines_alloc(allocator, &offset,
282 if (!offset) 322 ctaglines_allocsize);
283 return -ENOMEM; 323 if (err)
324 return err;
284 325
285 priv->comptags.lines = lines; 326 /*
286 priv->comptags.real_offset = offset; 327 * offset needs to be at the start of a page/cacheline boundary;
287 priv->comptags.allocated_lines = ctaglines_to_allocate; 328 * prune the preceding ctaglines that were allocated for alignment.
329 */
330 alignment_lines =
331 DIV_ROUND_UP(offset, ctagline_align) * ctagline_align - offset;
332 if (alignment_lines) {
333 gk20a_comptaglines_free(allocator, offset, alignment_lines);
334 offset += alignment_lines;
335 ctaglines_allocsize -= alignment_lines;
336 }
288 337
338 /*
339 * check if we can prune the trailing, too; we just need to reserve
340 * whole pages and ctagcachelines.
341 */
289 if (user_mappable) { 342 if (user_mappable) {
290 u64 win_size = 343 u32 needed_cachelines =
344 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
345 u32 needed_bytes = round_up(needed_cachelines *
346 aggregate_cacheline_sz,
347 small_pgsz);
348 u32 first_unneeded_cacheline =
349 DIV_ROUND_UP(needed_bytes, aggregate_cacheline_sz);
350 u32 needed_ctaglines = first_unneeded_cacheline *
351 g->gr.comptags_per_cacheline;
352 u64 win_size;
353
354 if (needed_ctaglines < ctaglines_allocsize) {
355 gk20a_comptaglines_free(allocator,
356 offset + needed_ctaglines,
357 ctaglines_allocsize - needed_ctaglines);
358 ctaglines_allocsize = needed_ctaglines;
359 }
360
361 *ctag_map_win_ctagline = offset;
362 win_size =
291 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) * 363 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) *
292 aggregate_cacheline_sz; 364 aggregate_cacheline_sz;
293 win_size = roundup(win_size, small_pgsz);
294 365
295 offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align; 366 *ctag_map_win_size = round_up(win_size, small_pgsz);
296 *ctag_map_win_ctagline = offset;
297 *ctag_map_win_size = win_size;
298 } 367 }
299 368
300
301 priv->comptags.offset = offset; 369 priv->comptags.offset = offset;
370 priv->comptags.lines = lines;
371 priv->comptags.allocated_lines = ctaglines_allocsize;
372 priv->comptags.user_mappable = user_mappable;
302 373
303 return 0; 374 return 0;
304} 375}
305 376
377
378
379
306static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) 380static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
307{ 381{
308 gk20a_dbg_fn(""); 382 gk20a_dbg_fn("");
@@ -1412,7 +1486,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1412 struct vm_gk20a_mapping_batch *batch) 1486 struct vm_gk20a_mapping_batch *batch)
1413{ 1487{
1414 struct gk20a *g = gk20a_from_vm(vm); 1488 struct gk20a *g = gk20a_from_vm(vm);
1415 struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags; 1489 struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags;
1416 struct device *d = dev_from_vm(vm); 1490 struct device *d = dev_from_vm(vm);
1417 struct mapped_buffer_node *mapped_buffer = NULL; 1491 struct mapped_buffer_node *mapped_buffer = NULL;
1418 bool inserted = false, va_allocated = false; 1492 bool inserted = false, va_allocated = false;
@@ -1579,32 +1653,14 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1579 gk20a_get_comptags(d, dmabuf, &comptags); 1653 gk20a_get_comptags(d, dmabuf, &comptags);
1580 clear_ctags = true; 1654 clear_ctags = true;
1581 1655
1582 comptags.user_mappable = user_mappable; 1656 if (comptags.lines < comptags.allocated_lines) {
1583 1657 /* clear tail-padding comptags */
1584 if (user_mappable) { 1658 u32 ctagmin = comptags.offset + comptags.lines;
1585 /* comptags for the buffer will be 1659 u32 ctagmax = comptags.offset +
1586 cleared later, but we need to make 1660 comptags.allocated_lines - 1;
1587 sure the whole comptags allocation 1661
1588 (which may be bigger) is cleared in 1662 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
1589 order not to leak compbits */ 1663 ctagmin, ctagmax);
1590
1591 const u32 buffer_ctag_end =
1592 comptags.offset + comptags.lines;
1593 const u32 alloc_ctag_end =
1594 comptags.real_offset +
1595 comptags.allocated_lines;
1596
1597 if (comptags.real_offset < comptags.offset)
1598 g->ops.ltc.cbc_ctrl(
1599 g, gk20a_cbc_op_clear,
1600 comptags.real_offset,
1601 comptags.offset - 1);
1602
1603 if (buffer_ctag_end < alloc_ctag_end)
1604 g->ops.ltc.cbc_ctrl(
1605 g, gk20a_cbc_op_clear,
1606 buffer_ctag_end,
1607 alloc_ctag_end - 1);
1608 } 1664 }
1609 } 1665 }
1610 } 1666 }