diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/radeon_cs.c')
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_cs.c | 100 |
1 files changed, 77 insertions, 23 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index e64bec488ed8..5cac83278338 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c | |||
@@ -85,12 +85,6 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p) | |||
85 | radeon_bo_list_add_object(&p->relocs[i].lobj, | 85 | radeon_bo_list_add_object(&p->relocs[i].lobj, |
86 | &p->validated); | 86 | &p->validated); |
87 | 87 | ||
88 | if (p->relocs[i].robj->tbo.sync_obj && !(r->flags & RADEON_RELOC_DONT_SYNC)) { | ||
89 | struct radeon_fence *fence = p->relocs[i].robj->tbo.sync_obj; | ||
90 | if (!radeon_fence_signaled(fence)) { | ||
91 | p->sync_to_ring[fence->ring] = true; | ||
92 | } | ||
93 | } | ||
94 | } else | 88 | } else |
95 | p->relocs[i].handle = 0; | 89 | p->relocs[i].handle = 0; |
96 | } | 90 | } |
@@ -109,8 +103,13 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority | |||
109 | p->ring = RADEON_RING_TYPE_GFX_INDEX; | 103 | p->ring = RADEON_RING_TYPE_GFX_INDEX; |
110 | break; | 104 | break; |
111 | case RADEON_CS_RING_COMPUTE: | 105 | case RADEON_CS_RING_COMPUTE: |
112 | /* for now */ | 106 | if (p->rdev->family >= CHIP_TAHITI) { |
113 | p->ring = RADEON_RING_TYPE_GFX_INDEX; | 107 | if (p->priority > 0) |
108 | p->ring = CAYMAN_RING_TYPE_CP1_INDEX; | ||
109 | else | ||
110 | p->ring = CAYMAN_RING_TYPE_CP2_INDEX; | ||
111 | } else | ||
112 | p->ring = RADEON_RING_TYPE_GFX_INDEX; | ||
114 | break; | 113 | break; |
115 | } | 114 | } |
116 | return 0; | 115 | return 0; |
@@ -118,11 +117,24 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority | |||
118 | 117 | ||
119 | static int radeon_cs_sync_rings(struct radeon_cs_parser *p) | 118 | static int radeon_cs_sync_rings(struct radeon_cs_parser *p) |
120 | { | 119 | { |
120 | bool sync_to_ring[RADEON_NUM_RINGS] = { }; | ||
121 | int i, r; | 121 | int i, r; |
122 | 122 | ||
123 | for (i = 0; i < p->nrelocs; i++) { | ||
124 | if (!p->relocs[i].robj || !p->relocs[i].robj->tbo.sync_obj) | ||
125 | continue; | ||
126 | |||
127 | if (!(p->relocs[i].flags & RADEON_RELOC_DONT_SYNC)) { | ||
128 | struct radeon_fence *fence = p->relocs[i].robj->tbo.sync_obj; | ||
129 | if (!radeon_fence_signaled(fence)) { | ||
130 | sync_to_ring[fence->ring] = true; | ||
131 | } | ||
132 | } | ||
133 | } | ||
134 | |||
123 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { | 135 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
124 | /* no need to sync to our own or unused rings */ | 136 | /* no need to sync to our own or unused rings */ |
125 | if (i == p->ring || !p->sync_to_ring[i] || !p->rdev->ring[i].ready) | 137 | if (i == p->ring || !sync_to_ring[i] || !p->rdev->ring[i].ready) |
126 | continue; | 138 | continue; |
127 | 139 | ||
128 | if (!p->ib->fence->semaphore) { | 140 | if (!p->ib->fence->semaphore) { |
@@ -163,6 +175,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) | |||
163 | p->chunk_ib_idx = -1; | 175 | p->chunk_ib_idx = -1; |
164 | p->chunk_relocs_idx = -1; | 176 | p->chunk_relocs_idx = -1; |
165 | p->chunk_flags_idx = -1; | 177 | p->chunk_flags_idx = -1; |
178 | p->chunk_const_ib_idx = -1; | ||
166 | p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); | 179 | p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); |
167 | if (p->chunks_array == NULL) { | 180 | if (p->chunks_array == NULL) { |
168 | return -ENOMEM; | 181 | return -ENOMEM; |
@@ -201,6 +214,12 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) | |||
201 | if (p->chunks[i].length_dw == 0) | 214 | if (p->chunks[i].length_dw == 0) |
202 | return -EINVAL; | 215 | return -EINVAL; |
203 | } | 216 | } |
217 | if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) { | ||
218 | p->chunk_const_ib_idx = i; | ||
219 | /* zero length CONST IB isn't useful */ | ||
220 | if (p->chunks[i].length_dw == 0) | ||
221 | return -EINVAL; | ||
222 | } | ||
204 | if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { | 223 | if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { |
205 | p->chunk_flags_idx = i; | 224 | p->chunk_flags_idx = i; |
206 | /* zero length flags aren't useful */ | 225 | /* zero length flags aren't useful */ |
@@ -236,21 +255,19 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) | |||
236 | if ((p->cs_flags & RADEON_CS_USE_VM) && | 255 | if ((p->cs_flags & RADEON_CS_USE_VM) && |
237 | !p->rdev->vm_manager.enabled) { | 256 | !p->rdev->vm_manager.enabled) { |
238 | DRM_ERROR("VM not active on asic!\n"); | 257 | DRM_ERROR("VM not active on asic!\n"); |
239 | if (p->chunk_relocs_idx != -1) | ||
240 | kfree(p->chunks[p->chunk_relocs_idx].kdata); | ||
241 | if (p->chunk_flags_idx != -1) | ||
242 | kfree(p->chunks[p->chunk_flags_idx].kdata); | ||
243 | return -EINVAL; | 258 | return -EINVAL; |
244 | } | 259 | } |
245 | 260 | ||
246 | if (radeon_cs_get_ring(p, ring, priority)) { | 261 | /* we only support VM on SI+ */ |
247 | if (p->chunk_relocs_idx != -1) | 262 | if ((p->rdev->family >= CHIP_TAHITI) && |
248 | kfree(p->chunks[p->chunk_relocs_idx].kdata); | 263 | ((p->cs_flags & RADEON_CS_USE_VM) == 0)) { |
249 | if (p->chunk_flags_idx != -1) | 264 | DRM_ERROR("VM required on SI+!\n"); |
250 | kfree(p->chunks[p->chunk_flags_idx].kdata); | ||
251 | return -EINVAL; | 265 | return -EINVAL; |
252 | } | 266 | } |
253 | 267 | ||
268 | if (radeon_cs_get_ring(p, ring, priority)) | ||
269 | return -EINVAL; | ||
270 | |||
254 | 271 | ||
255 | /* deal with non-vm */ | 272 | /* deal with non-vm */ |
256 | if ((p->chunk_ib_idx != -1) && | 273 | if ((p->chunk_ib_idx != -1) && |
@@ -264,11 +281,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) | |||
264 | p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); | 281 | p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); |
265 | p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); | 282 | p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); |
266 | if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL || | 283 | if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL || |
267 | p->chunks[p->chunk_ib_idx].kpage[1] == NULL) { | 284 | p->chunks[p->chunk_ib_idx].kpage[1] == NULL) |
268 | kfree(p->chunks[p->chunk_ib_idx].kpage[0]); | ||
269 | kfree(p->chunks[p->chunk_ib_idx].kpage[1]); | ||
270 | return -ENOMEM; | 285 | return -ENOMEM; |
271 | } | ||
272 | p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1; | 286 | p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1; |
273 | p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1; | 287 | p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1; |
274 | p->chunks[p->chunk_ib_idx].last_copied_page = -1; | 288 | p->chunks[p->chunk_ib_idx].last_copied_page = -1; |
@@ -341,7 +355,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, | |||
341 | return r; | 355 | return r; |
342 | } | 356 | } |
343 | parser->ib->length_dw = ib_chunk->length_dw; | 357 | parser->ib->length_dw = ib_chunk->length_dw; |
344 | r = radeon_cs_parse(parser); | 358 | r = radeon_cs_parse(rdev, parser->ring, parser); |
345 | if (r || parser->parser_error) { | 359 | if (r || parser->parser_error) { |
346 | DRM_ERROR("Invalid command stream !\n"); | 360 | DRM_ERROR("Invalid command stream !\n"); |
347 | return r; | 361 | return r; |
@@ -394,6 +408,32 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, | |||
394 | if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) | 408 | if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) |
395 | return 0; | 409 | return 0; |
396 | 410 | ||
411 | if ((rdev->family >= CHIP_TAHITI) && | ||
412 | (parser->chunk_const_ib_idx != -1)) { | ||
413 | ib_chunk = &parser->chunks[parser->chunk_const_ib_idx]; | ||
414 | if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { | ||
415 | DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw); | ||
416 | return -EINVAL; | ||
417 | } | ||
418 | r = radeon_ib_get(rdev, parser->ring, &parser->const_ib, | ||
419 | ib_chunk->length_dw * 4); | ||
420 | if (r) { | ||
421 | DRM_ERROR("Failed to get const ib !\n"); | ||
422 | return r; | ||
423 | } | ||
424 | parser->const_ib->is_const_ib = true; | ||
425 | parser->const_ib->length_dw = ib_chunk->length_dw; | ||
426 | /* Copy the packet into the IB */ | ||
427 | if (DRM_COPY_FROM_USER(parser->const_ib->ptr, ib_chunk->user_ptr, | ||
428 | ib_chunk->length_dw * 4)) { | ||
429 | return -EFAULT; | ||
430 | } | ||
431 | r = radeon_ring_ib_parse(rdev, parser->ring, parser->const_ib); | ||
432 | if (r) { | ||
433 | return r; | ||
434 | } | ||
435 | } | ||
436 | |||
397 | ib_chunk = &parser->chunks[parser->chunk_ib_idx]; | 437 | ib_chunk = &parser->chunks[parser->chunk_ib_idx]; |
398 | if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { | 438 | if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { |
399 | DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); | 439 | DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); |
@@ -429,11 +469,25 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, | |||
429 | if (r) { | 469 | if (r) { |
430 | DRM_ERROR("Failed to synchronize rings !\n"); | 470 | DRM_ERROR("Failed to synchronize rings !\n"); |
431 | } | 471 | } |
472 | |||
473 | if ((rdev->family >= CHIP_TAHITI) && | ||
474 | (parser->chunk_const_ib_idx != -1)) { | ||
475 | parser->const_ib->vm_id = vm->id; | ||
476 | /* ib pool is bind at 0 in virtual address space to gpu_addr is the | ||
477 | * offset inside the pool bo | ||
478 | */ | ||
479 | parser->const_ib->gpu_addr = parser->const_ib->sa_bo.offset; | ||
480 | r = radeon_ib_schedule(rdev, parser->const_ib); | ||
481 | if (r) | ||
482 | goto out; | ||
483 | } | ||
484 | |||
432 | parser->ib->vm_id = vm->id; | 485 | parser->ib->vm_id = vm->id; |
433 | /* ib pool is bind at 0 in virtual address space to gpu_addr is the | 486 | /* ib pool is bind at 0 in virtual address space to gpu_addr is the |
434 | * offset inside the pool bo | 487 | * offset inside the pool bo |
435 | */ | 488 | */ |
436 | parser->ib->gpu_addr = parser->ib->sa_bo.offset; | 489 | parser->ib->gpu_addr = parser->ib->sa_bo.offset; |
490 | parser->ib->is_const_ib = false; | ||
437 | r = radeon_ib_schedule(rdev, parser->ib); | 491 | r = radeon_ib_schedule(rdev, parser->ib); |
438 | out: | 492 | out: |
439 | if (!r) { | 493 | if (!r) { |