aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2014-11-19 08:01:25 -0500
committerAlex Deucher <alexander.deucher@amd.com>2014-11-20 13:00:18 -0500
commit7c42bc1aa23fc061a6ff9c2bd9208817bd54ea04 (patch)
tree118a08edc96cea488f6eb9ceef7922b0068dbd3b
parentad1a58a45ae9a35aaf68b27d21600889360680f3 (diff)
drm/radeon: use one VMID for each ring
Use multiple VMIDs for each VM, one for each ring. That allows us to execute flushes separately on each ring, still not ideal cause in a lot of cases rings can share IDs. Signed-off-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/radeon/cik.c4
-rw-r--r--drivers/gpu/drm/radeon/cik_sdma.c2
-rw-r--r--drivers/gpu/drm/radeon/ni.c6
-rw-r--r--drivers/gpu/drm/radeon/ni_dma.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon.h36
-rw-r--r--drivers/gpu/drm/radeon/radeon_vm.c59
-rw-r--r--drivers/gpu/drm/radeon/si.c6
7 files changed, 68 insertions, 48 deletions
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 755923bc6786..3deeed33322f 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4066,6 +4066,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4066void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 4066void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4067{ 4067{
4068 struct radeon_ring *ring = &rdev->ring[ib->ring]; 4068 struct radeon_ring *ring = &rdev->ring[ib->ring];
4069 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4069 u32 header, control = INDIRECT_BUFFER_VALID; 4070 u32 header, control = INDIRECT_BUFFER_VALID;
4070 4071
4071 if (ib->is_const_ib) { 4072 if (ib->is_const_ib) {
@@ -4094,8 +4095,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4094 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4095 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4095 } 4096 }
4096 4097
4097 control |= ib->length_dw | 4098 control |= ib->length_dw | (vm_id << 24);
4098 (ib->vm ? (ib->vm->id << 24) : 0);
4099 4099
4100 radeon_ring_write(ring, header); 4100 radeon_ring_write(ring, header);
4101 radeon_ring_write(ring, 4101 radeon_ring_write(ring,
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index 604e2e770951..54b98379188d 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
134 struct radeon_ib *ib) 134 struct radeon_ib *ib)
135{ 135{
136 struct radeon_ring *ring = &rdev->ring[ib->ring]; 136 struct radeon_ring *ring = &rdev->ring[ib->ring];
137 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; 137 u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;
138 138
139 if (rdev->wb.enabled) { 139 if (rdev->wb.enabled) {
140 u32 next_rptr = ring->wptr + 5; 140 u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index bee432d3dd30..360de9f1f491 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1373,6 +1373,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
1373void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 1373void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1374{ 1374{
1375 struct radeon_ring *ring = &rdev->ring[ib->ring]; 1375 struct radeon_ring *ring = &rdev->ring[ib->ring];
1376 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
1376 u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA | 1377 u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
1377 PACKET3_SH_ACTION_ENA; 1378 PACKET3_SH_ACTION_ENA;
1378 1379
@@ -1395,15 +1396,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1395#endif 1396#endif
1396 (ib->gpu_addr & 0xFFFFFFFC)); 1397 (ib->gpu_addr & 0xFFFFFFFC));
1397 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); 1398 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
1398 radeon_ring_write(ring, ib->length_dw | 1399 radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
1399 (ib->vm ? (ib->vm->id << 24) : 0));
1400 1400
1401 /* flush read cache over gart for this vmid */ 1401 /* flush read cache over gart for this vmid */
1402 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); 1402 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1403 radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl); 1403 radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl);
1404 radeon_ring_write(ring, 0xFFFFFFFF); 1404 radeon_ring_write(ring, 0xFFFFFFFF);
1405 radeon_ring_write(ring, 0); 1405 radeon_ring_write(ring, 0);
1406 radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */ 1406 radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */
1407} 1407}
1408 1408
1409static void cayman_cp_enable(struct radeon_device *rdev, bool enable) 1409static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c
index 5a72404c9d5e..50f88611ff60 100644
--- a/drivers/gpu/drm/radeon/ni_dma.c
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
123 struct radeon_ib *ib) 123 struct radeon_ib *ib)
124{ 124{
125 struct radeon_ring *ring = &rdev->ring[ib->ring]; 125 struct radeon_ring *ring = &rdev->ring[ib->ring];
126 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
126 127
127 if (rdev->wb.enabled) { 128 if (rdev->wb.enabled) {
128 u32 next_rptr = ring->wptr + 4; 129 u32 next_rptr = ring->wptr + 4;
@@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
140 */ 141 */
141 while ((ring->wptr & 7) != 5) 142 while ((ring->wptr & 7) != 5)
142 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); 143 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
143 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); 144 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
144 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 145 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
145 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 146 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
146 147
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 61b2eeabf7a4..79f5f5bf4c0c 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -905,33 +905,39 @@ struct radeon_vm_pt {
905 uint64_t addr; 905 uint64_t addr;
906}; 906};
907 907
908struct radeon_vm_id {
909 unsigned id;
910 uint64_t pd_gpu_addr;
911 /* last flushed PD/PT update */
912 struct radeon_fence *flushed_updates;
913 /* last use of vmid */
914 struct radeon_fence *last_id_use;
915};
916
908struct radeon_vm { 917struct radeon_vm {
909 struct rb_root va; 918 struct rb_root va;
910 unsigned id;
911 919
912 /* BOs moved, but not yet updated in the PT */ 920 /* BOs moved, but not yet updated in the PT */
913 struct list_head invalidated; 921 struct list_head invalidated;
914 922
915 /* BOs freed, but not yet updated in the PT */ 923 /* BOs freed, but not yet updated in the PT */
916 struct list_head freed; 924 struct list_head freed;
917 925
918 /* contains the page directory */ 926 /* contains the page directory */
919 struct radeon_bo *page_directory; 927 struct radeon_bo *page_directory;
920 uint64_t pd_gpu_addr; 928 unsigned max_pde_used;
921 unsigned max_pde_used;
922 929
923 /* array of page tables, one for each page directory entry */ 930 /* array of page tables, one for each page directory entry */
924 struct radeon_vm_pt *page_tables; 931 struct radeon_vm_pt *page_tables;
925 932
926 struct radeon_bo_va *ib_bo_va; 933 struct radeon_bo_va *ib_bo_va;
927 934
928 struct mutex mutex; 935 struct mutex mutex;
929 /* last fence for cs using this vm */ 936 /* last fence for cs using this vm */
930 struct radeon_fence *fence; 937 struct radeon_fence *fence;
931 /* last flushed PD/PT update */ 938
932 struct radeon_fence *flushed_updates; 939 /* for id and flush management per ring */
933 /* last use of vmid */ 940 struct radeon_vm_id ids[RADEON_NUM_RINGS];
934 struct radeon_fence *last_id_use;
935}; 941};
936 942
937struct radeon_vm_manager { 943struct radeon_vm_manager {
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 6ff5741ea403..e38efe4962f3 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -182,15 +182,18 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
182 struct radeon_vm *vm, int ring) 182 struct radeon_vm *vm, int ring)
183{ 183{
184 struct radeon_fence *best[RADEON_NUM_RINGS] = {}; 184 struct radeon_fence *best[RADEON_NUM_RINGS] = {};
185 struct radeon_vm_id *vm_id = &vm->ids[ring];
186
185 unsigned choices[2] = {}; 187 unsigned choices[2] = {};
186 unsigned i; 188 unsigned i;
187 189
188 /* check if the id is still valid */ 190 /* check if the id is still valid */
189 if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) 191 if (vm_id->id && vm_id->last_id_use &&
192 vm_id->last_id_use == rdev->vm_manager.active[vm_id->id])
190 return NULL; 193 return NULL;
191 194
192 /* we definately need to flush */ 195 /* we definately need to flush */
193 vm->pd_gpu_addr = ~0ll; 196 vm_id->pd_gpu_addr = ~0ll;
194 197
195 /* skip over VMID 0, since it is the system VM */ 198 /* skip over VMID 0, since it is the system VM */
196 for (i = 1; i < rdev->vm_manager.nvm; ++i) { 199 for (i = 1; i < rdev->vm_manager.nvm; ++i) {
@@ -198,8 +201,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
198 201
199 if (fence == NULL) { 202 if (fence == NULL) {
200 /* found a free one */ 203 /* found a free one */
201 vm->id = i; 204 vm_id->id = i;
202 trace_radeon_vm_grab_id(vm->id, ring); 205 trace_radeon_vm_grab_id(i, ring);
203 return NULL; 206 return NULL;
204 } 207 }
205 208
@@ -211,8 +214,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
211 214
212 for (i = 0; i < 2; ++i) { 215 for (i = 0; i < 2; ++i) {
213 if (choices[i]) { 216 if (choices[i]) {
214 vm->id = choices[i]; 217 vm_id->id = choices[i];
215 trace_radeon_vm_grab_id(vm->id, ring); 218 trace_radeon_vm_grab_id(choices[i], ring);
216 return rdev->vm_manager.active[choices[i]]; 219 return rdev->vm_manager.active[choices[i]];
217 } 220 }
218 } 221 }
@@ -239,16 +242,18 @@ void radeon_vm_flush(struct radeon_device *rdev,
239 int ring, struct radeon_fence *updates) 242 int ring, struct radeon_fence *updates)
240{ 243{
241 uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); 244 uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
245 struct radeon_vm_id *vm_id = &vm->ids[ring];
242 246
243 if (pd_addr != vm->pd_gpu_addr || !vm->flushed_updates || 247 if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates ||
244 radeon_fence_is_earlier(vm->flushed_updates, updates)) { 248 radeon_fence_is_earlier(vm_id->flushed_updates, updates)) {
245 249
246 trace_radeon_vm_flush(pd_addr, ring, vm->id); 250 trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id);
247 radeon_fence_unref(&vm->flushed_updates); 251 radeon_fence_unref(&vm_id->flushed_updates);
248 vm->flushed_updates = radeon_fence_ref(updates); 252 vm_id->flushed_updates = radeon_fence_ref(updates);
249 vm->pd_gpu_addr = pd_addr; 253 vm_id->pd_gpu_addr = pd_addr;
250 radeon_ring_vm_flush(rdev, &rdev->ring[ring], 254 radeon_ring_vm_flush(rdev, &rdev->ring[ring],
251 vm->id, vm->pd_gpu_addr); 255 vm_id->id, vm_id->pd_gpu_addr);
256
252 } 257 }
253} 258}
254 259
@@ -268,14 +273,16 @@ void radeon_vm_fence(struct radeon_device *rdev,
268 struct radeon_vm *vm, 273 struct radeon_vm *vm,
269 struct radeon_fence *fence) 274 struct radeon_fence *fence)
270{ 275{
276 unsigned vm_id = vm->ids[fence->ring].id;
277
271 radeon_fence_unref(&vm->fence); 278 radeon_fence_unref(&vm->fence);
272 vm->fence = radeon_fence_ref(fence); 279 vm->fence = radeon_fence_ref(fence);
273 280
274 radeon_fence_unref(&rdev->vm_manager.active[vm->id]); 281 radeon_fence_unref(&rdev->vm_manager.active[vm_id]);
275 rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); 282 rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);
276 283
277 radeon_fence_unref(&vm->last_id_use); 284 radeon_fence_unref(&vm->ids[fence->ring].last_id_use);
278 vm->last_id_use = radeon_fence_ref(fence); 285 vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence);
279} 286}
280 287
281/** 288/**
@@ -1120,13 +1127,16 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
1120 const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, 1127 const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
1121 RADEON_VM_PTE_COUNT * 8); 1128 RADEON_VM_PTE_COUNT * 8);
1122 unsigned pd_size, pd_entries, pts_size; 1129 unsigned pd_size, pd_entries, pts_size;
1123 int r; 1130 int i, r;
1124 1131
1125 vm->id = 0;
1126 vm->ib_bo_va = NULL; 1132 vm->ib_bo_va = NULL;
1127 vm->fence = NULL; 1133 vm->fence = NULL;
1128 vm->flushed_updates = NULL; 1134
1129 vm->last_id_use = NULL; 1135 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
1136 vm->ids[i].id = 0;
1137 vm->ids[i].flushed_updates = NULL;
1138 vm->ids[i].last_id_use = NULL;
1139 }
1130 mutex_init(&vm->mutex); 1140 mutex_init(&vm->mutex);
1131 vm->va = RB_ROOT; 1141 vm->va = RB_ROOT;
1132 INIT_LIST_HEAD(&vm->invalidated); 1142 INIT_LIST_HEAD(&vm->invalidated);
@@ -1197,8 +1207,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
1197 radeon_bo_unref(&vm->page_directory); 1207 radeon_bo_unref(&vm->page_directory);
1198 1208
1199 radeon_fence_unref(&vm->fence); 1209 radeon_fence_unref(&vm->fence);
1200 radeon_fence_unref(&vm->flushed_updates); 1210
1201 radeon_fence_unref(&vm->last_id_use); 1211 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
1212 radeon_fence_unref(&vm->ids[i].flushed_updates);
1213 radeon_fence_unref(&vm->ids[i].last_id_use);
1214 }
1202 1215
1203 mutex_destroy(&vm->mutex); 1216 mutex_destroy(&vm->mutex);
1204} 1217}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index e91968b04154..14896ce76324 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -3362,6 +3362,7 @@ void si_fence_ring_emit(struct radeon_device *rdev,
3362void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 3362void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3363{ 3363{
3364 struct radeon_ring *ring = &rdev->ring[ib->ring]; 3364 struct radeon_ring *ring = &rdev->ring[ib->ring];
3365 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3365 u32 header; 3366 u32 header;
3366 3367
3367 if (ib->is_const_ib) { 3368 if (ib->is_const_ib) {
@@ -3397,14 +3398,13 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3397#endif 3398#endif
3398 (ib->gpu_addr & 0xFFFFFFFC)); 3399 (ib->gpu_addr & 0xFFFFFFFC));
3399 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 3400 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3400 radeon_ring_write(ring, ib->length_dw | 3401 radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3401 (ib->vm ? (ib->vm->id << 24) : 0));
3402 3402
3403 if (!ib->is_const_ib) { 3403 if (!ib->is_const_ib) {
3404 /* flush read cache over gart for this vmid */ 3404 /* flush read cache over gart for this vmid */
3405 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 3405 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3406 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); 3406 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3407 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0); 3407 radeon_ring_write(ring, vm_id);
3408 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); 3408 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3409 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA | 3409 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3410 PACKET3_TC_ACTION_ENA | 3410 PACKET3_TC_ACTION_ENA |