summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2015-12-01 04:55:27 -0500
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-01-19 20:44:27 -0500
commitdb7095ce5180552d1a70fdea779e5987d55cce7b (patch)
tree35b9125f86efbfca657910f33a49b1c08ddc6acc /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent7095a72e563b5d7969c5f09053e469906362428f (diff)
gpu: nvgpu: bitmap allocator for comptags
Restore comptags to be bitmap-allocated, like they were before we had the buddy allocator. The new buddy allocator introduced by e99aa2485f8992eabe3556f3ebcb57bdc8ad91ff (originally 6ab2e0c49cb79ca68d2f83f1d4610783d2eaa79b) is fine for the big VAs, but unsuitable for the small compbit store. This commit reverts partially the combination of the above commit and also one after it, 86fc7ec9a05999bea8de320840b962db3ee11410, that fixed a bug which is not present when using a bitmap. With a bitmap allocator, pruning the extra allocation necessary for user-mapped mode is possible, so that is also restored. The original generic bitmap allocator is not restored; instead, a comptag-only allocator is introduced. Bug 200145635 Change-Id: I87f3a911826a801124cfd21e44857dfab1c3f378 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/837180 (cherry picked from commit 5a504aeb54f3e89e6561932971158a397157b3f2) Reviewed-on: http://git-master/r/839742 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c170
1 files changed, 113 insertions, 57 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 76c33512..e79cc1d1 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -111,7 +111,7 @@ static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm);
111struct gk20a_dmabuf_priv { 111struct gk20a_dmabuf_priv {
112 struct mutex lock; 112 struct mutex lock;
113 113
114 struct gk20a_allocator *comptag_allocator; 114 struct gk20a_comptag_allocator *comptag_allocator;
115 struct gk20a_comptags comptags; 115 struct gk20a_comptags comptags;
116 116
117 struct dma_buf_attachment *attach; 117 struct dma_buf_attachment *attach;
@@ -126,6 +126,41 @@ struct gk20a_dmabuf_priv {
126 126
127static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm); 127static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm);
128 128
129static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
130 u32 *offset, u32 len)
131{
132 unsigned long addr;
133 int err = 0;
134
135 mutex_lock(&allocator->lock);
136 addr = bitmap_find_next_zero_area(allocator->bitmap, allocator->size,
137 0, len, 0);
138 if (addr < allocator->size) {
139 /* number zero is reserved; bitmap base is 1 */
140 *offset = 1 + addr;
141 bitmap_set(allocator->bitmap, addr, len);
142 } else {
143 err = -ENOMEM;
144 }
145 mutex_unlock(&allocator->lock);
146
147 return err;
148}
149
150static void gk20a_comptaglines_free(struct gk20a_comptag_allocator *allocator,
151 u32 offset, u32 len)
152{
153 /* number zero is reserved; bitmap base is 1 */
154 u32 addr = offset - 1;
155 WARN_ON(offset == 0);
156 WARN_ON(addr > allocator->size);
157 WARN_ON(addr + len > allocator->size);
158
159 mutex_lock(&allocator->lock);
160 bitmap_clear(allocator->bitmap, addr, len);
161 mutex_unlock(&allocator->lock);
162}
163
129static void gk20a_mm_delete_priv(void *_priv) 164static void gk20a_mm_delete_priv(void *_priv)
130{ 165{
131 struct gk20a_buffer_state *s, *s_tmp; 166 struct gk20a_buffer_state *s, *s_tmp;
@@ -135,8 +170,9 @@ static void gk20a_mm_delete_priv(void *_priv)
135 170
136 if (priv->comptags.lines) { 171 if (priv->comptags.lines) {
137 BUG_ON(!priv->comptag_allocator); 172 BUG_ON(!priv->comptag_allocator);
138 gk20a_bfree(priv->comptag_allocator, 173 gk20a_comptaglines_free(priv->comptag_allocator,
139 priv->comptags.real_offset); 174 priv->comptags.offset,
175 priv->comptags.allocated_lines);
140 } 176 }
141 177
142 /* Free buffer states */ 178 /* Free buffer states */
@@ -221,19 +257,21 @@ void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
221static int gk20a_alloc_comptags(struct gk20a *g, 257static int gk20a_alloc_comptags(struct gk20a *g,
222 struct device *dev, 258 struct device *dev,
223 struct dma_buf *dmabuf, 259 struct dma_buf *dmabuf,
224 struct gk20a_allocator *allocator, 260 struct gk20a_comptag_allocator *allocator,
225 u32 lines, bool user_mappable, 261 u32 lines, bool user_mappable,
226 u64 *ctag_map_win_size, 262 u64 *ctag_map_win_size,
227 u32 *ctag_map_win_ctagline) 263 u32 *ctag_map_win_ctagline)
228{ 264{
229 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 265 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
230 u32 ctaglines_to_allocate; 266 u32 ctaglines_allocsize;
231 u32 ctagline_align = 1; 267 u32 ctagline_align;
232 u32 offset; 268 u32 offset;
269 u32 alignment_lines;
233 const u32 aggregate_cacheline_sz = 270 const u32 aggregate_cacheline_sz =
234 g->gr.cacheline_size * g->gr.slices_per_ltc * 271 g->gr.cacheline_size * g->gr.slices_per_ltc *
235 g->ltc_count; 272 g->ltc_count;
236 const u32 small_pgsz = 4096; 273 const u32 small_pgsz = 4096;
274 int err;
237 275
238 if (!priv) 276 if (!priv)
239 return -ENOSYS; 277 return -ENOSYS;
@@ -242,17 +280,19 @@ static int gk20a_alloc_comptags(struct gk20a *g,
242 return -EINVAL; 280 return -EINVAL;
243 281
244 if (!user_mappable) { 282 if (!user_mappable) {
245 ctaglines_to_allocate = lines; 283 ctaglines_allocsize = lines;
284 ctagline_align = 1;
246 } else { 285 } else {
247 /* Unfortunately, we cannot use allocation alignment 286 /*
248 * here, since compbits per cacheline is not always a 287 * For security, align the allocation on a page, and reserve
249 * power of two. So, we just have to allocate enough 288 * whole pages. Unfortunately, we cannot ask the allocator to
250 * extra that we're guaranteed to find a ctagline 289 * align here, since compbits per cacheline is not always a
251 * inside the allocation so that: 1) it is the first 290 * power of two. So, we just have to allocate enough extra that
252 * ctagline in a cacheline that starts at a page 291 * we're guaranteed to find a ctagline inside the allocation so
253 * boundary, and 2) we can add enough overallocation 292 * that: 1) it is the first ctagline in a cacheline that starts
254 * that the ctaglines of the succeeding allocation 293 * at a page boundary, and 2) we can add enough overallocation
255 * are on different page than ours 294 * that the ctaglines of the succeeding allocation are on
295 * different page than ours.
256 */ 296 */
257 297
258 ctagline_align = 298 ctagline_align =
@@ -260,7 +300,7 @@ static int gk20a_alloc_comptags(struct gk20a *g,
260 aggregate_cacheline_sz) * 300 aggregate_cacheline_sz) *
261 g->gr.comptags_per_cacheline; 301 g->gr.comptags_per_cacheline;
262 302
263 ctaglines_to_allocate = 303 ctaglines_allocsize =
264 /* for alignment */ 304 /* for alignment */
265 ctagline_align + 305 ctagline_align +
266 306
@@ -272,37 +312,71 @@ static int gk20a_alloc_comptags(struct gk20a *g,
272 DIV_ROUND_UP(aggregate_cacheline_sz, small_pgsz) * 312 DIV_ROUND_UP(aggregate_cacheline_sz, small_pgsz) *
273 g->gr.comptags_per_cacheline; 313 g->gr.comptags_per_cacheline;
274 314
275 if (ctaglines_to_allocate < lines) 315 if (ctaglines_allocsize < lines)
276 return -EINVAL; /* integer overflow */ 316 return -EINVAL; /* integer overflow */
277 } 317 }
278 318
279 /* store the allocator so we can use it when we free the ctags */ 319 /* store the allocator so we can use it when we free the ctags */
280 priv->comptag_allocator = allocator; 320 priv->comptag_allocator = allocator;
281 offset = gk20a_balloc(allocator, ctaglines_to_allocate); 321 err = gk20a_comptaglines_alloc(allocator, &offset,
282 if (!offset) 322 ctaglines_allocsize);
283 return -ENOMEM; 323 if (err)
324 return err;
284 325
285 priv->comptags.lines = lines; 326 /*
286 priv->comptags.real_offset = offset; 327 * offset needs to be at the start of a page/cacheline boundary;
287 priv->comptags.allocated_lines = ctaglines_to_allocate; 328 * prune the preceding ctaglines that were allocated for alignment.
329 */
330 alignment_lines =
331 DIV_ROUND_UP(offset, ctagline_align) * ctagline_align - offset;
332 if (alignment_lines) {
333 gk20a_comptaglines_free(allocator, offset, alignment_lines);
334 offset += alignment_lines;
335 ctaglines_allocsize -= alignment_lines;
336 }
288 337
338 /*
339 * check if we can prune the trailing, too; we just need to reserve
340 * whole pages and ctagcachelines.
341 */
289 if (user_mappable) { 342 if (user_mappable) {
290 u64 win_size = 343 u32 needed_cachelines =
344 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
345 u32 needed_bytes = round_up(needed_cachelines *
346 aggregate_cacheline_sz,
347 small_pgsz);
348 u32 first_unneeded_cacheline =
349 DIV_ROUND_UP(needed_bytes, aggregate_cacheline_sz);
350 u32 needed_ctaglines = first_unneeded_cacheline *
351 g->gr.comptags_per_cacheline;
352 u64 win_size;
353
354 if (needed_ctaglines < ctaglines_allocsize) {
355 gk20a_comptaglines_free(allocator,
356 offset + needed_ctaglines,
357 ctaglines_allocsize - needed_ctaglines);
358 ctaglines_allocsize = needed_ctaglines;
359 }
360
361 *ctag_map_win_ctagline = offset;
362 win_size =
291 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) * 363 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) *
292 aggregate_cacheline_sz; 364 aggregate_cacheline_sz;
293 win_size = roundup(win_size, small_pgsz);
294 365
295 offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align; 366 *ctag_map_win_size = round_up(win_size, small_pgsz);
296 *ctag_map_win_ctagline = offset;
297 *ctag_map_win_size = win_size;
298 } 367 }
299 368
300
301 priv->comptags.offset = offset; 369 priv->comptags.offset = offset;
370 priv->comptags.lines = lines;
371 priv->comptags.allocated_lines = ctaglines_allocsize;
372 priv->comptags.user_mappable = user_mappable;
302 373
303 return 0; 374 return 0;
304} 375}
305 376
377
378
379
306static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) 380static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
307{ 381{
308 gk20a_dbg_fn(""); 382 gk20a_dbg_fn("");
@@ -1412,7 +1486,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1412 struct vm_gk20a_mapping_batch *batch) 1486 struct vm_gk20a_mapping_batch *batch)
1413{ 1487{
1414 struct gk20a *g = gk20a_from_vm(vm); 1488 struct gk20a *g = gk20a_from_vm(vm);
1415 struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags; 1489 struct gk20a_comptag_allocator *ctag_allocator = &g->gr.comp_tags;
1416 struct device *d = dev_from_vm(vm); 1490 struct device *d = dev_from_vm(vm);
1417 struct mapped_buffer_node *mapped_buffer = NULL; 1491 struct mapped_buffer_node *mapped_buffer = NULL;
1418 bool inserted = false, va_allocated = false; 1492 bool inserted = false, va_allocated = false;
@@ -1579,32 +1653,14 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1579 gk20a_get_comptags(d, dmabuf, &comptags); 1653 gk20a_get_comptags(d, dmabuf, &comptags);
1580 clear_ctags = true; 1654 clear_ctags = true;
1581 1655
1582 comptags.user_mappable = user_mappable; 1656 if (comptags.lines < comptags.allocated_lines) {
1583 1657 /* clear tail-padding comptags */
1584 if (user_mappable) { 1658 u32 ctagmin = comptags.offset + comptags.lines;
1585 /* comptags for the buffer will be 1659 u32 ctagmax = comptags.offset +
1586 cleared later, but we need to make 1660 comptags.allocated_lines - 1;
1587 sure the whole comptags allocation 1661
1588 (which may be bigger) is cleared in 1662 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
1589 order not to leak compbits */ 1663 ctagmin, ctagmax);
1590
1591 const u32 buffer_ctag_end =
1592 comptags.offset + comptags.lines;
1593 const u32 alloc_ctag_end =
1594 comptags.real_offset +
1595 comptags.allocated_lines;
1596
1597 if (comptags.real_offset < comptags.offset)
1598 g->ops.ltc.cbc_ctrl(
1599 g, gk20a_cbc_op_clear,
1600 comptags.real_offset,
1601 comptags.offset - 1);
1602
1603 if (buffer_ctag_end < alloc_ctag_end)
1604 g->ops.ltc.cbc_ctrl(
1605 g, gk20a_cbc_op_clear,
1606 buffer_ctag_end,
1607 alloc_ctag_end - 1);
1608 } 1664 }
1609 } 1665 }
1610 } 1666 }