diff options
author | Dave Airlie <airlied@redhat.com> | 2015-08-26 23:00:28 -0400 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2015-08-26 23:00:28 -0400 |
commit | 40b2dffbcc67e92d5df97785dffc68fe88605bfa (patch) | |
tree | 91276b6ae4210791ad4494adaf69a56b16c7b0ac | |
parent | db56176025cee5e242dfeed5f4e304d095d29fa3 (diff) | |
parent | c2b6bd7e91aad8440a2f55bdbde6f5a8ae19fac5 (diff) |
Merge branch 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
- DP fixes for radeon and amdgpu
- IH ring fix for tonga and fiji
- Lots of GPU scheduler fixes
- Misc additional fixes
* 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux: (42 commits)
drm/amdgpu: fix wait queue handling in the scheduler
drm/amdgpu: remove extra parameters from scheduler callbacks
drm/amdgpu: wake up scheduler only when neccessary
drm/amdgpu: remove entity idle timeout v2
drm/amdgpu: fix postclose order
drm/amdgpu: use IB for copy buffer of eviction
drm/amdgpu: adjust the judgement of removing fence callback
drm/amdgpu: fix no sync_wait in copy_buffer
drm/amdgpu: fix last_vm_update fence is not effetive for sched fence
drm/amdgpu: add priv data to sched
drm/amdgpu: add owner for sched fence
drm/amdgpu: remove entity reference from sched fence
drm/amdgpu: fix and cleanup amd_sched_entity_push_job
drm/amdgpu: remove amdgpu_bo_list_clone
drm/amdgpu: remove the context from amdgpu_job
drm/amdgpu: remove unused parameters to amd_sched_create
drm/amdgpu: remove sched_lock
drm/amdgpu: remove prepare_job callback
drm/amdgpu: cleanup a scheduler function name
drm/amdgpu: reorder scheduler functions
...
30 files changed, 671 insertions, 727 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2fc58e658986..aa2dcf578dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -183,6 +183,7 @@ struct amdgpu_vm; | |||
183 | struct amdgpu_ring; | 183 | struct amdgpu_ring; |
184 | struct amdgpu_semaphore; | 184 | struct amdgpu_semaphore; |
185 | struct amdgpu_cs_parser; | 185 | struct amdgpu_cs_parser; |
186 | struct amdgpu_job; | ||
186 | struct amdgpu_irq_src; | 187 | struct amdgpu_irq_src; |
187 | struct amdgpu_fpriv; | 188 | struct amdgpu_fpriv; |
188 | 189 | ||
@@ -246,7 +247,7 @@ struct amdgpu_buffer_funcs { | |||
246 | unsigned copy_num_dw; | 247 | unsigned copy_num_dw; |
247 | 248 | ||
248 | /* used for buffer migration */ | 249 | /* used for buffer migration */ |
249 | void (*emit_copy_buffer)(struct amdgpu_ring *ring, | 250 | void (*emit_copy_buffer)(struct amdgpu_ib *ib, |
250 | /* src addr in bytes */ | 251 | /* src addr in bytes */ |
251 | uint64_t src_offset, | 252 | uint64_t src_offset, |
252 | /* dst addr in bytes */ | 253 | /* dst addr in bytes */ |
@@ -439,9 +440,12 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring); | |||
439 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); | 440 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); |
440 | unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); | 441 | unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); |
441 | 442 | ||
442 | signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, | 443 | signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, |
443 | struct amdgpu_fence **fences, | 444 | struct fence **array, |
444 | bool intr, long t); | 445 | uint32_t count, |
446 | bool wait_all, | ||
447 | bool intr, | ||
448 | signed long t); | ||
445 | struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence); | 449 | struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence); |
446 | void amdgpu_fence_unref(struct amdgpu_fence **fence); | 450 | void amdgpu_fence_unref(struct amdgpu_fence **fence); |
447 | 451 | ||
@@ -514,7 +518,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, | |||
514 | uint64_t dst_offset, | 518 | uint64_t dst_offset, |
515 | uint32_t byte_count, | 519 | uint32_t byte_count, |
516 | struct reservation_object *resv, | 520 | struct reservation_object *resv, |
517 | struct amdgpu_fence **fence); | 521 | struct fence **fence); |
518 | int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); | 522 | int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); |
519 | 523 | ||
520 | struct amdgpu_bo_list_entry { | 524 | struct amdgpu_bo_list_entry { |
@@ -650,7 +654,7 @@ struct amdgpu_sa_bo { | |||
650 | struct amdgpu_sa_manager *manager; | 654 | struct amdgpu_sa_manager *manager; |
651 | unsigned soffset; | 655 | unsigned soffset; |
652 | unsigned eoffset; | 656 | unsigned eoffset; |
653 | struct amdgpu_fence *fence; | 657 | struct fence *fence; |
654 | }; | 658 | }; |
655 | 659 | ||
656 | /* | 660 | /* |
@@ -692,7 +696,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring, | |||
692 | struct amdgpu_semaphore *semaphore); | 696 | struct amdgpu_semaphore *semaphore); |
693 | void amdgpu_semaphore_free(struct amdgpu_device *adev, | 697 | void amdgpu_semaphore_free(struct amdgpu_device *adev, |
694 | struct amdgpu_semaphore **semaphore, | 698 | struct amdgpu_semaphore **semaphore, |
695 | struct amdgpu_fence *fence); | 699 | struct fence *fence); |
696 | 700 | ||
697 | /* | 701 | /* |
698 | * Synchronization | 702 | * Synchronization |
@@ -700,7 +704,8 @@ void amdgpu_semaphore_free(struct amdgpu_device *adev, | |||
700 | struct amdgpu_sync { | 704 | struct amdgpu_sync { |
701 | struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS]; | 705 | struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS]; |
702 | struct amdgpu_fence *sync_to[AMDGPU_MAX_RINGS]; | 706 | struct amdgpu_fence *sync_to[AMDGPU_MAX_RINGS]; |
703 | struct amdgpu_fence *last_vm_update; | 707 | DECLARE_HASHTABLE(fences, 4); |
708 | struct fence *last_vm_update; | ||
704 | }; | 709 | }; |
705 | 710 | ||
706 | void amdgpu_sync_create(struct amdgpu_sync *sync); | 711 | void amdgpu_sync_create(struct amdgpu_sync *sync); |
@@ -712,8 +717,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, | |||
712 | void *owner); | 717 | void *owner); |
713 | int amdgpu_sync_rings(struct amdgpu_sync *sync, | 718 | int amdgpu_sync_rings(struct amdgpu_sync *sync, |
714 | struct amdgpu_ring *ring); | 719 | struct amdgpu_ring *ring); |
720 | int amdgpu_sync_wait(struct amdgpu_sync *sync); | ||
715 | void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, | 721 | void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, |
716 | struct amdgpu_fence *fence); | 722 | struct fence *fence); |
717 | 723 | ||
718 | /* | 724 | /* |
719 | * GART structures, functions & helpers | 725 | * GART structures, functions & helpers |
@@ -871,7 +877,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, | |||
871 | struct amdgpu_ring *ring, | 877 | struct amdgpu_ring *ring, |
872 | struct amdgpu_ib *ibs, | 878 | struct amdgpu_ib *ibs, |
873 | unsigned num_ibs, | 879 | unsigned num_ibs, |
874 | int (*free_job)(struct amdgpu_cs_parser *), | 880 | int (*free_job)(struct amdgpu_job *), |
875 | void *owner, | 881 | void *owner, |
876 | struct fence **fence); | 882 | struct fence **fence); |
877 | 883 | ||
@@ -957,7 +963,7 @@ struct amdgpu_vm_id { | |||
957 | unsigned id; | 963 | unsigned id; |
958 | uint64_t pd_gpu_addr; | 964 | uint64_t pd_gpu_addr; |
959 | /* last flushed PD/PT update */ | 965 | /* last flushed PD/PT update */ |
960 | struct amdgpu_fence *flushed_updates; | 966 | struct fence *flushed_updates; |
961 | /* last use of vmid */ | 967 | /* last use of vmid */ |
962 | struct amdgpu_fence *last_id_use; | 968 | struct amdgpu_fence *last_id_use; |
963 | }; | 969 | }; |
@@ -1042,7 +1048,7 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); | |||
1042 | int amdgpu_ctx_put(struct amdgpu_ctx *ctx); | 1048 | int amdgpu_ctx_put(struct amdgpu_ctx *ctx); |
1043 | 1049 | ||
1044 | uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, | 1050 | uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, |
1045 | struct fence *fence, uint64_t queued_seq); | 1051 | struct fence *fence); |
1046 | struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, | 1052 | struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, |
1047 | struct amdgpu_ring *ring, uint64_t seq); | 1053 | struct amdgpu_ring *ring, uint64_t seq); |
1048 | 1054 | ||
@@ -1078,8 +1084,6 @@ struct amdgpu_bo_list { | |||
1078 | }; | 1084 | }; |
1079 | 1085 | ||
1080 | struct amdgpu_bo_list * | 1086 | struct amdgpu_bo_list * |
1081 | amdgpu_bo_list_clone(struct amdgpu_bo_list *list); | ||
1082 | struct amdgpu_bo_list * | ||
1083 | amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id); | 1087 | amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id); |
1084 | void amdgpu_bo_list_put(struct amdgpu_bo_list *list); | 1088 | void amdgpu_bo_list_put(struct amdgpu_bo_list *list); |
1085 | void amdgpu_bo_list_free(struct amdgpu_bo_list *list); | 1089 | void amdgpu_bo_list_free(struct amdgpu_bo_list *list); |
@@ -1255,14 +1259,16 @@ struct amdgpu_cs_parser { | |||
1255 | 1259 | ||
1256 | /* user fence */ | 1260 | /* user fence */ |
1257 | struct amdgpu_user_fence uf; | 1261 | struct amdgpu_user_fence uf; |
1262 | }; | ||
1258 | 1263 | ||
1259 | struct amdgpu_ring *ring; | 1264 | struct amdgpu_job { |
1260 | struct mutex job_lock; | 1265 | struct amd_sched_job base; |
1261 | struct work_struct job_work; | 1266 | struct amdgpu_device *adev; |
1262 | int (*prepare_job)(struct amdgpu_cs_parser *sched_job); | 1267 | struct amdgpu_ib *ibs; |
1263 | int (*run_job)(struct amdgpu_cs_parser *sched_job); | 1268 | uint32_t num_ibs; |
1264 | int (*free_job)(struct amdgpu_cs_parser *sched_job); | 1269 | struct mutex job_lock; |
1265 | struct amd_sched_fence *s_fence; | 1270 | struct amdgpu_user_fence uf; |
1271 | int (*free_job)(struct amdgpu_job *sched_job); | ||
1266 | }; | 1272 | }; |
1267 | 1273 | ||
1268 | static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) | 1274 | static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) |
@@ -2241,7 +2247,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) | |||
2241 | #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) | 2247 | #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) |
2242 | #define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s)) | 2248 | #define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s)) |
2243 | #define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s)) | 2249 | #define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s)) |
2244 | #define amdgpu_emit_copy_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((r), (s), (d), (b)) | 2250 | #define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) |
2245 | #define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b)) | 2251 | #define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b)) |
2246 | #define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev)) | 2252 | #define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev)) |
2247 | #define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) | 2253 | #define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) |
@@ -2343,7 +2349,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | |||
2343 | struct amdgpu_sync *sync); | 2349 | struct amdgpu_sync *sync); |
2344 | void amdgpu_vm_flush(struct amdgpu_ring *ring, | 2350 | void amdgpu_vm_flush(struct amdgpu_ring *ring, |
2345 | struct amdgpu_vm *vm, | 2351 | struct amdgpu_vm *vm, |
2346 | struct amdgpu_fence *updates); | 2352 | struct fence *updates); |
2347 | void amdgpu_vm_fence(struct amdgpu_device *adev, | 2353 | void amdgpu_vm_fence(struct amdgpu_device *adev, |
2348 | struct amdgpu_vm *vm, | 2354 | struct amdgpu_vm *vm, |
2349 | struct amdgpu_fence *fence); | 2355 | struct amdgpu_fence *fence); |
@@ -2373,7 +2379,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | |||
2373 | uint64_t addr); | 2379 | uint64_t addr); |
2374 | void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | 2380 | void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, |
2375 | struct amdgpu_bo_va *bo_va); | 2381 | struct amdgpu_bo_va *bo_va); |
2376 | 2382 | int amdgpu_vm_free_job(struct amdgpu_job *job); | |
2377 | /* | 2383 | /* |
2378 | * functions used by amdgpu_encoder.c | 2384 | * functions used by amdgpu_encoder.c |
2379 | */ | 2385 | */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 759482e4300d..98d59ee640ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | |||
@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, | |||
33 | { | 33 | { |
34 | unsigned long start_jiffies; | 34 | unsigned long start_jiffies; |
35 | unsigned long end_jiffies; | 35 | unsigned long end_jiffies; |
36 | struct amdgpu_fence *fence = NULL; | 36 | struct fence *fence = NULL; |
37 | int i, r; | 37 | int i, r; |
38 | 38 | ||
39 | start_jiffies = jiffies; | 39 | start_jiffies = jiffies; |
@@ -42,17 +42,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, | |||
42 | r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence); | 42 | r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence); |
43 | if (r) | 43 | if (r) |
44 | goto exit_do_move; | 44 | goto exit_do_move; |
45 | r = fence_wait(&fence->base, false); | 45 | r = fence_wait(fence, false); |
46 | if (r) | 46 | if (r) |
47 | goto exit_do_move; | 47 | goto exit_do_move; |
48 | amdgpu_fence_unref(&fence); | 48 | fence_put(fence); |
49 | } | 49 | } |
50 | end_jiffies = jiffies; | 50 | end_jiffies = jiffies; |
51 | r = jiffies_to_msecs(end_jiffies - start_jiffies); | 51 | r = jiffies_to_msecs(end_jiffies - start_jiffies); |
52 | 52 | ||
53 | exit_do_move: | 53 | exit_do_move: |
54 | if (fence) | 54 | if (fence) |
55 | amdgpu_fence_unref(&fence); | 55 | fence_put(fence); |
56 | return r; | 56 | return r; |
57 | } | 57 | } |
58 | 58 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 7eed523bf28f..f82a2dd83874 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | |||
@@ -62,39 +62,6 @@ static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, | |||
62 | return 0; | 62 | return 0; |
63 | } | 63 | } |
64 | 64 | ||
65 | struct amdgpu_bo_list * | ||
66 | amdgpu_bo_list_clone(struct amdgpu_bo_list *list) | ||
67 | { | ||
68 | struct amdgpu_bo_list *result; | ||
69 | unsigned i; | ||
70 | |||
71 | result = kmalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); | ||
72 | if (!result) | ||
73 | return NULL; | ||
74 | |||
75 | result->array = drm_calloc_large(list->num_entries, | ||
76 | sizeof(struct amdgpu_bo_list_entry)); | ||
77 | if (!result->array) { | ||
78 | kfree(result); | ||
79 | return NULL; | ||
80 | } | ||
81 | |||
82 | mutex_init(&result->lock); | ||
83 | result->gds_obj = list->gds_obj; | ||
84 | result->gws_obj = list->gws_obj; | ||
85 | result->oa_obj = list->oa_obj; | ||
86 | result->has_userptr = list->has_userptr; | ||
87 | result->num_entries = list->num_entries; | ||
88 | |||
89 | memcpy(result->array, list->array, list->num_entries * | ||
90 | sizeof(struct amdgpu_bo_list_entry)); | ||
91 | |||
92 | for (i = 0; i < result->num_entries; ++i) | ||
93 | amdgpu_bo_ref(result->array[i].robj); | ||
94 | |||
95 | return result; | ||
96 | } | ||
97 | |||
98 | static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) | 65 | static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) |
99 | { | 66 | { |
100 | struct amdgpu_bo_list *list; | 67 | struct amdgpu_bo_list *list; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 27df17a0e620..89c3dd62ba21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | |||
@@ -75,6 +75,11 @@ void amdgpu_connector_hotplug(struct drm_connector *connector) | |||
75 | if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { | 75 | if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { |
76 | drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); | 76 | drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); |
77 | } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { | 77 | } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { |
78 | /* Don't try to start link training before we | ||
79 | * have the dpcd */ | ||
80 | if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) | ||
81 | return; | ||
82 | |||
78 | /* set it to OFF so that drm_helper_connector_dpms() | 83 | /* set it to OFF so that drm_helper_connector_dpms() |
79 | * won't return immediately since the current state | 84 | * won't return immediately since the current state |
80 | * is ON at this point. | 85 | * is ON at this point. |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e4424b4db5d3..6a206f15635f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -126,19 +126,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, | |||
126 | return 0; | 126 | return 0; |
127 | } | 127 | } |
128 | 128 | ||
129 | static void amdgpu_job_work_func(struct work_struct *work) | ||
130 | { | ||
131 | struct amdgpu_cs_parser *sched_job = | ||
132 | container_of(work, struct amdgpu_cs_parser, | ||
133 | job_work); | ||
134 | mutex_lock(&sched_job->job_lock); | ||
135 | if (sched_job->free_job) | ||
136 | sched_job->free_job(sched_job); | ||
137 | mutex_unlock(&sched_job->job_lock); | ||
138 | /* after processing job, free memory */ | ||
139 | fence_put(&sched_job->s_fence->base); | ||
140 | kfree(sched_job); | ||
141 | } | ||
142 | struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, | 129 | struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, |
143 | struct drm_file *filp, | 130 | struct drm_file *filp, |
144 | struct amdgpu_ctx *ctx, | 131 | struct amdgpu_ctx *ctx, |
@@ -157,10 +144,6 @@ struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, | |||
157 | parser->ctx = ctx; | 144 | parser->ctx = ctx; |
158 | parser->ibs = ibs; | 145 | parser->ibs = ibs; |
159 | parser->num_ibs = num_ibs; | 146 | parser->num_ibs = num_ibs; |
160 | if (amdgpu_enable_scheduler) { | ||
161 | mutex_init(&parser->job_lock); | ||
162 | INIT_WORK(&parser->job_work, amdgpu_job_work_func); | ||
163 | } | ||
164 | for (i = 0; i < num_ibs; i++) | 147 | for (i = 0; i < num_ibs; i++) |
165 | ibs[i].ctx = ctx; | 148 | ibs[i].ctx = ctx; |
166 | 149 | ||
@@ -173,7 +156,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
173 | uint64_t *chunk_array_user; | 156 | uint64_t *chunk_array_user; |
174 | uint64_t *chunk_array = NULL; | 157 | uint64_t *chunk_array = NULL; |
175 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 158 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
176 | struct amdgpu_bo_list *bo_list = NULL; | ||
177 | unsigned size, i; | 159 | unsigned size, i; |
178 | int r = 0; | 160 | int r = 0; |
179 | 161 | ||
@@ -185,20 +167,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
185 | r = -EINVAL; | 167 | r = -EINVAL; |
186 | goto out; | 168 | goto out; |
187 | } | 169 | } |
188 | bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); | 170 | p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); |
189 | if (!amdgpu_enable_scheduler) | ||
190 | p->bo_list = bo_list; | ||
191 | else { | ||
192 | if (bo_list && !bo_list->has_userptr) { | ||
193 | p->bo_list = amdgpu_bo_list_clone(bo_list); | ||
194 | amdgpu_bo_list_put(bo_list); | ||
195 | if (!p->bo_list) | ||
196 | return -ENOMEM; | ||
197 | } else if (bo_list && bo_list->has_userptr) | ||
198 | p->bo_list = bo_list; | ||
199 | else | ||
200 | p->bo_list = NULL; | ||
201 | } | ||
202 | 171 | ||
203 | /* get chunks */ | 172 | /* get chunks */ |
204 | INIT_LIST_HEAD(&p->validated); | 173 | INIT_LIST_HEAD(&p->validated); |
@@ -291,7 +260,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
291 | } | 260 | } |
292 | 261 | ||
293 | 262 | ||
294 | p->ibs = kmalloc_array(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); | 263 | p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); |
295 | if (!p->ibs) | 264 | if (!p->ibs) |
296 | r = -ENOMEM; | 265 | r = -ENOMEM; |
297 | 266 | ||
@@ -498,25 +467,24 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) | |||
498 | unsigned i; | 467 | unsigned i; |
499 | if (parser->ctx) | 468 | if (parser->ctx) |
500 | amdgpu_ctx_put(parser->ctx); | 469 | amdgpu_ctx_put(parser->ctx); |
501 | if (parser->bo_list) { | 470 | if (parser->bo_list) |
502 | if (amdgpu_enable_scheduler && !parser->bo_list->has_userptr) | 471 | amdgpu_bo_list_put(parser->bo_list); |
503 | amdgpu_bo_list_free(parser->bo_list); | 472 | |
504 | else | ||
505 | amdgpu_bo_list_put(parser->bo_list); | ||
506 | } | ||
507 | drm_free_large(parser->vm_bos); | 473 | drm_free_large(parser->vm_bos); |
508 | for (i = 0; i < parser->nchunks; i++) | 474 | for (i = 0; i < parser->nchunks; i++) |
509 | drm_free_large(parser->chunks[i].kdata); | 475 | drm_free_large(parser->chunks[i].kdata); |
510 | kfree(parser->chunks); | 476 | kfree(parser->chunks); |
511 | if (parser->ibs) | ||
512 | for (i = 0; i < parser->num_ibs; i++) | ||
513 | amdgpu_ib_free(parser->adev, &parser->ibs[i]); | ||
514 | kfree(parser->ibs); | ||
515 | if (parser->uf.bo) | ||
516 | drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); | ||
517 | |||
518 | if (!amdgpu_enable_scheduler) | 477 | if (!amdgpu_enable_scheduler) |
519 | kfree(parser); | 478 | { |
479 | if (parser->ibs) | ||
480 | for (i = 0; i < parser->num_ibs; i++) | ||
481 | amdgpu_ib_free(parser->adev, &parser->ibs[i]); | ||
482 | kfree(parser->ibs); | ||
483 | if (parser->uf.bo) | ||
484 | drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); | ||
485 | } | ||
486 | |||
487 | kfree(parser); | ||
520 | } | 488 | } |
521 | 489 | ||
522 | /** | 490 | /** |
@@ -533,12 +501,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo | |||
533 | amdgpu_cs_parser_fini_late(parser); | 501 | amdgpu_cs_parser_fini_late(parser); |
534 | } | 502 | } |
535 | 503 | ||
536 | static int amdgpu_cs_parser_free_job(struct amdgpu_cs_parser *sched_job) | ||
537 | { | ||
538 | amdgpu_cs_parser_fini_late(sched_job); | ||
539 | return 0; | ||
540 | } | ||
541 | |||
542 | static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, | 504 | static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, |
543 | struct amdgpu_vm *vm) | 505 | struct amdgpu_vm *vm) |
544 | { | 506 | { |
@@ -810,68 +772,16 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, | |||
810 | return 0; | 772 | return 0; |
811 | } | 773 | } |
812 | 774 | ||
813 | static int amdgpu_cs_parser_prepare_job(struct amdgpu_cs_parser *sched_job) | 775 | static int amdgpu_cs_free_job(struct amdgpu_job *sched_job) |
814 | { | 776 | { |
815 | int r, i; | 777 | int i; |
816 | struct amdgpu_cs_parser *parser = sched_job; | 778 | if (sched_job->ibs) |
817 | struct amdgpu_device *adev = sched_job->adev; | 779 | for (i = 0; i < sched_job->num_ibs; i++) |
818 | bool reserved_buffers = false; | 780 | amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]); |
819 | 781 | kfree(sched_job->ibs); | |
820 | r = amdgpu_cs_parser_relocs(parser); | 782 | if (sched_job->uf.bo) |
821 | if (r) { | 783 | drm_gem_object_unreference_unlocked(&sched_job->uf.bo->gem_base); |
822 | if (r != -ERESTARTSYS) { | 784 | return 0; |
823 | if (r == -ENOMEM) | ||
824 | DRM_ERROR("Not enough memory for command submission!\n"); | ||
825 | else | ||
826 | DRM_ERROR("Failed to process the buffer list %d!\n", r); | ||
827 | } | ||
828 | } | ||
829 | |||
830 | if (!r) { | ||
831 | reserved_buffers = true; | ||
832 | r = amdgpu_cs_ib_fill(adev, parser); | ||
833 | } | ||
834 | if (!r) { | ||
835 | r = amdgpu_cs_dependencies(adev, parser); | ||
836 | if (r) | ||
837 | DRM_ERROR("Failed in the dependencies handling %d!\n", r); | ||
838 | } | ||
839 | if (r) { | ||
840 | amdgpu_cs_parser_fini(parser, r, reserved_buffers); | ||
841 | return r; | ||
842 | } | ||
843 | |||
844 | for (i = 0; i < parser->num_ibs; i++) | ||
845 | trace_amdgpu_cs(parser, i); | ||
846 | |||
847 | r = amdgpu_cs_ib_vm_chunk(adev, parser); | ||
848 | return r; | ||
849 | } | ||
850 | |||
851 | static struct amdgpu_ring *amdgpu_cs_parser_get_ring( | ||
852 | struct amdgpu_device *adev, | ||
853 | struct amdgpu_cs_parser *parser) | ||
854 | { | ||
855 | int i, r; | ||
856 | |||
857 | struct amdgpu_cs_chunk *chunk; | ||
858 | struct drm_amdgpu_cs_chunk_ib *chunk_ib; | ||
859 | struct amdgpu_ring *ring; | ||
860 | for (i = 0; i < parser->nchunks; i++) { | ||
861 | chunk = &parser->chunks[i]; | ||
862 | chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; | ||
863 | |||
864 | if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) | ||
865 | continue; | ||
866 | |||
867 | r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, | ||
868 | chunk_ib->ip_instance, chunk_ib->ring, | ||
869 | &ring); | ||
870 | if (r) | ||
871 | return NULL; | ||
872 | break; | ||
873 | } | ||
874 | return ring; | ||
875 | } | 785 | } |
876 | 786 | ||
877 | int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | 787 | int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) |
@@ -879,7 +789,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
879 | struct amdgpu_device *adev = dev->dev_private; | 789 | struct amdgpu_device *adev = dev->dev_private; |
880 | union drm_amdgpu_cs *cs = data; | 790 | union drm_amdgpu_cs *cs = data; |
881 | struct amdgpu_cs_parser *parser; | 791 | struct amdgpu_cs_parser *parser; |
882 | int r; | 792 | bool reserved_buffers = false; |
793 | int i, r; | ||
883 | 794 | ||
884 | down_read(&adev->exclusive_lock); | 795 | down_read(&adev->exclusive_lock); |
885 | if (!adev->accel_working) { | 796 | if (!adev->accel_working) { |
@@ -899,44 +810,79 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
899 | return r; | 810 | return r; |
900 | } | 811 | } |
901 | 812 | ||
902 | if (amdgpu_enable_scheduler && parser->num_ibs) { | 813 | r = amdgpu_cs_parser_relocs(parser); |
903 | struct amdgpu_ring * ring = | 814 | if (r == -ENOMEM) |
904 | amdgpu_cs_parser_get_ring(adev, parser); | 815 | DRM_ERROR("Not enough memory for command submission!\n"); |
905 | r = amdgpu_cs_parser_prepare_job(parser); | 816 | else if (r && r != -ERESTARTSYS) |
817 | DRM_ERROR("Failed to process the buffer list %d!\n", r); | ||
818 | else if (!r) { | ||
819 | reserved_buffers = true; | ||
820 | r = amdgpu_cs_ib_fill(adev, parser); | ||
821 | } | ||
822 | |||
823 | if (!r) { | ||
824 | r = amdgpu_cs_dependencies(adev, parser); | ||
906 | if (r) | 825 | if (r) |
907 | goto out; | 826 | DRM_ERROR("Failed in the dependencies handling %d!\n", r); |
908 | parser->ring = ring; | 827 | } |
909 | parser->free_job = amdgpu_cs_parser_free_job; | 828 | |
910 | mutex_lock(&parser->job_lock); | 829 | if (r) |
911 | r = amd_sched_push_job(ring->scheduler, | 830 | goto out; |
912 | &parser->ctx->rings[ring->idx].entity, | 831 | |
913 | parser, | 832 | for (i = 0; i < parser->num_ibs; i++) |
914 | &parser->s_fence); | 833 | trace_amdgpu_cs(parser, i); |
834 | |||
835 | r = amdgpu_cs_ib_vm_chunk(adev, parser); | ||
836 | if (r) | ||
837 | goto out; | ||
838 | |||
839 | if (amdgpu_enable_scheduler && parser->num_ibs) { | ||
840 | struct amdgpu_job *job; | ||
841 | struct amdgpu_ring * ring = parser->ibs->ring; | ||
842 | job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); | ||
843 | if (!job) | ||
844 | return -ENOMEM; | ||
845 | job->base.sched = ring->scheduler; | ||
846 | job->base.s_entity = &parser->ctx->rings[ring->idx].entity; | ||
847 | job->adev = parser->adev; | ||
848 | job->ibs = parser->ibs; | ||
849 | job->num_ibs = parser->num_ibs; | ||
850 | job->base.owner = parser->filp; | ||
851 | mutex_init(&job->job_lock); | ||
852 | if (job->ibs[job->num_ibs - 1].user) { | ||
853 | memcpy(&job->uf, &parser->uf, | ||
854 | sizeof(struct amdgpu_user_fence)); | ||
855 | job->ibs[job->num_ibs - 1].user = &job->uf; | ||
856 | } | ||
857 | |||
858 | job->free_job = amdgpu_cs_free_job; | ||
859 | mutex_lock(&job->job_lock); | ||
860 | r = amd_sched_entity_push_job((struct amd_sched_job *)job); | ||
915 | if (r) { | 861 | if (r) { |
916 | mutex_unlock(&parser->job_lock); | 862 | mutex_unlock(&job->job_lock); |
863 | amdgpu_cs_free_job(job); | ||
864 | kfree(job); | ||
917 | goto out; | 865 | goto out; |
918 | } | 866 | } |
919 | parser->ibs[parser->num_ibs - 1].sequence = | 867 | cs->out.handle = |
920 | amdgpu_ctx_add_fence(parser->ctx, ring, | 868 | amdgpu_ctx_add_fence(parser->ctx, ring, |
921 | &parser->s_fence->base, | 869 | &job->base.s_fence->base); |
922 | parser->s_fence->v_seq); | 870 | parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle; |
923 | cs->out.handle = parser->s_fence->v_seq; | 871 | |
924 | list_sort(NULL, &parser->validated, cmp_size_smaller_first); | 872 | list_sort(NULL, &parser->validated, cmp_size_smaller_first); |
925 | ttm_eu_fence_buffer_objects(&parser->ticket, | 873 | ttm_eu_fence_buffer_objects(&parser->ticket, |
926 | &parser->validated, | 874 | &parser->validated, |
927 | &parser->s_fence->base); | 875 | &job->base.s_fence->base); |
928 | 876 | ||
929 | mutex_unlock(&parser->job_lock); | 877 | mutex_unlock(&job->job_lock); |
878 | amdgpu_cs_parser_fini_late(parser); | ||
930 | up_read(&adev->exclusive_lock); | 879 | up_read(&adev->exclusive_lock); |
931 | return 0; | 880 | return 0; |
932 | } | 881 | } |
933 | r = amdgpu_cs_parser_prepare_job(parser); | ||
934 | if (r) | ||
935 | goto out; | ||
936 | 882 | ||
937 | cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; | 883 | cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; |
938 | out: | 884 | out: |
939 | amdgpu_cs_parser_fini(parser, r, true); | 885 | amdgpu_cs_parser_fini(parser, r, reserved_buffers); |
940 | up_read(&adev->exclusive_lock); | 886 | up_read(&adev->exclusive_lock); |
941 | r = amdgpu_cs_handle_lockup(adev, r); | 887 | r = amdgpu_cs_handle_lockup(adev, r); |
942 | return r; | 888 | return r; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 08bc7722ddb8..20cbc4eb5a6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | |||
@@ -229,17 +229,13 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) | |||
229 | } | 229 | } |
230 | 230 | ||
231 | uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, | 231 | uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, |
232 | struct fence *fence, uint64_t queued_seq) | 232 | struct fence *fence) |
233 | { | 233 | { |
234 | struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; | 234 | struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; |
235 | uint64_t seq = 0; | 235 | uint64_t seq = cring->sequence; |
236 | unsigned idx = 0; | 236 | unsigned idx = 0; |
237 | struct fence *other = NULL; | 237 | struct fence *other = NULL; |
238 | 238 | ||
239 | if (amdgpu_enable_scheduler) | ||
240 | seq = queued_seq; | ||
241 | else | ||
242 | seq = cring->sequence; | ||
243 | idx = seq % AMDGPU_CTX_MAX_CS_PENDING; | 239 | idx = seq % AMDGPU_CTX_MAX_CS_PENDING; |
244 | other = cring->fences[idx]; | 240 | other = cring->fences[idx]; |
245 | if (other) { | 241 | if (other) { |
@@ -253,8 +249,7 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, | |||
253 | 249 | ||
254 | spin_lock(&ctx->ring_lock); | 250 | spin_lock(&ctx->ring_lock); |
255 | cring->fences[idx] = fence; | 251 | cring->fences[idx] = fence; |
256 | if (!amdgpu_enable_scheduler) | 252 | cring->sequence++; |
257 | cring->sequence++; | ||
258 | spin_unlock(&ctx->ring_lock); | 253 | spin_unlock(&ctx->ring_lock); |
259 | 254 | ||
260 | fence_put(other); | 255 | fence_put(other); |
@@ -267,21 +262,16 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, | |||
267 | { | 262 | { |
268 | struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; | 263 | struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; |
269 | struct fence *fence; | 264 | struct fence *fence; |
270 | uint64_t queued_seq; | ||
271 | 265 | ||
272 | spin_lock(&ctx->ring_lock); | 266 | spin_lock(&ctx->ring_lock); |
273 | if (amdgpu_enable_scheduler) | ||
274 | queued_seq = amd_sched_next_queued_seq(&cring->entity); | ||
275 | else | ||
276 | queued_seq = cring->sequence; | ||
277 | 267 | ||
278 | if (seq >= queued_seq) { | 268 | if (seq >= cring->sequence) { |
279 | spin_unlock(&ctx->ring_lock); | 269 | spin_unlock(&ctx->ring_lock); |
280 | return ERR_PTR(-EINVAL); | 270 | return ERR_PTR(-EINVAL); |
281 | } | 271 | } |
282 | 272 | ||
283 | 273 | ||
284 | if (seq + AMDGPU_CTX_MAX_CS_PENDING < queued_seq) { | 274 | if (seq + AMDGPU_CTX_MAX_CS_PENDING < cring->sequence) { |
285 | spin_unlock(&ctx->ring_lock); | 275 | spin_unlock(&ctx->ring_lock); |
286 | return NULL; | 276 | return NULL; |
287 | } | 277 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e6fa27805207..0fcc0bd1622c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | |||
@@ -49,9 +49,10 @@ | |||
49 | /* | 49 | /* |
50 | * KMS wrapper. | 50 | * KMS wrapper. |
51 | * - 3.0.0 - initial driver | 51 | * - 3.0.0 - initial driver |
52 | * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP) | ||
52 | */ | 53 | */ |
53 | #define KMS_DRIVER_MAJOR 3 | 54 | #define KMS_DRIVER_MAJOR 3 |
54 | #define KMS_DRIVER_MINOR 0 | 55 | #define KMS_DRIVER_MINOR 1 |
55 | #define KMS_DRIVER_PATCHLEVEL 0 | 56 | #define KMS_DRIVER_PATCHLEVEL 0 |
56 | 57 | ||
57 | int amdgpu_vram_limit = 0; | 58 | int amdgpu_vram_limit = 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 98500f1756f7..f446bf2fedc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |||
@@ -626,10 +626,10 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) | |||
626 | ring->fence_drv.ring = ring; | 626 | ring->fence_drv.ring = ring; |
627 | 627 | ||
628 | if (amdgpu_enable_scheduler) { | 628 | if (amdgpu_enable_scheduler) { |
629 | ring->scheduler = amd_sched_create((void *)ring->adev, | 629 | ring->scheduler = amd_sched_create(&amdgpu_sched_ops, |
630 | &amdgpu_sched_ops, | 630 | ring->idx, |
631 | ring->idx, 5, 0, | 631 | amdgpu_sched_hw_submission, |
632 | amdgpu_sched_hw_submission); | 632 | (void *)ring->adev); |
633 | if (!ring->scheduler) | 633 | if (!ring->scheduler) |
634 | DRM_ERROR("Failed to create scheduler on ring %d.\n", | 634 | DRM_ERROR("Failed to create scheduler on ring %d.\n", |
635 | ring->idx); | 635 | ring->idx); |
@@ -836,22 +836,37 @@ static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence) | |||
836 | return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); | 836 | return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); |
837 | } | 837 | } |
838 | 838 | ||
839 | static inline bool amdgpu_test_signaled_any(struct amdgpu_fence **fences) | 839 | static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count) |
840 | { | 840 | { |
841 | int idx; | 841 | int idx; |
842 | struct amdgpu_fence *fence; | 842 | struct fence *fence; |
843 | 843 | ||
844 | idx = 0; | 844 | for (idx = 0; idx < count; ++idx) { |
845 | for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { | ||
846 | fence = fences[idx]; | 845 | fence = fences[idx]; |
847 | if (fence) { | 846 | if (fence) { |
848 | if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags)) | 847 | if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) |
849 | return true; | 848 | return true; |
850 | } | 849 | } |
851 | } | 850 | } |
852 | return false; | 851 | return false; |
853 | } | 852 | } |
854 | 853 | ||
854 | static bool amdgpu_test_signaled_all(struct fence **fences, uint32_t count) | ||
855 | { | ||
856 | int idx; | ||
857 | struct fence *fence; | ||
858 | |||
859 | for (idx = 0; idx < count; ++idx) { | ||
860 | fence = fences[idx]; | ||
861 | if (fence) { | ||
862 | if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) | ||
863 | return false; | ||
864 | } | ||
865 | } | ||
866 | |||
867 | return true; | ||
868 | } | ||
869 | |||
855 | struct amdgpu_wait_cb { | 870 | struct amdgpu_wait_cb { |
856 | struct fence_cb base; | 871 | struct fence_cb base; |
857 | struct task_struct *task; | 872 | struct task_struct *task; |
@@ -867,33 +882,56 @@ static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb) | |||
867 | static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, | 882 | static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, |
868 | signed long t) | 883 | signed long t) |
869 | { | 884 | { |
870 | struct amdgpu_fence *array[AMDGPU_MAX_RINGS]; | ||
871 | struct amdgpu_fence *fence = to_amdgpu_fence(f); | 885 | struct amdgpu_fence *fence = to_amdgpu_fence(f); |
872 | struct amdgpu_device *adev = fence->ring->adev; | 886 | struct amdgpu_device *adev = fence->ring->adev; |
873 | 887 | ||
874 | memset(&array[0], 0, sizeof(array)); | 888 | return amdgpu_fence_wait_multiple(adev, &f, 1, false, intr, t); |
875 | array[0] = fence; | ||
876 | |||
877 | return amdgpu_fence_wait_any(adev, array, intr, t); | ||
878 | } | 889 | } |
879 | 890 | ||
880 | /* wait until any fence in array signaled */ | 891 | /** |
881 | signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, | 892 | * Wait the fence array with timeout |
882 | struct amdgpu_fence **array, bool intr, signed long t) | 893 | * |
894 | * @adev: amdgpu device | ||
895 | * @array: the fence array with amdgpu fence pointer | ||
896 | * @count: the number of the fence array | ||
897 | * @wait_all: the flag of wait all(true) or wait any(false) | ||
898 | * @intr: when sleep, set the current task interruptable or not | ||
899 | * @t: timeout to wait | ||
900 | * | ||
901 | * If wait_all is true, it will return when all fences are signaled or timeout. | ||
902 | * If wait_all is false, it will return when any fence is signaled or timeout. | ||
903 | */ | ||
904 | signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, | ||
905 | struct fence **array, | ||
906 | uint32_t count, | ||
907 | bool wait_all, | ||
908 | bool intr, | ||
909 | signed long t) | ||
883 | { | 910 | { |
884 | long idx = 0; | 911 | long idx = 0; |
885 | struct amdgpu_wait_cb cb[AMDGPU_MAX_RINGS]; | 912 | struct amdgpu_wait_cb *cb; |
886 | struct amdgpu_fence *fence; | 913 | struct fence *fence; |
887 | 914 | ||
888 | BUG_ON(!array); | 915 | BUG_ON(!array); |
889 | 916 | ||
890 | for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { | 917 | cb = kcalloc(count, sizeof(struct amdgpu_wait_cb), GFP_KERNEL); |
918 | if (cb == NULL) { | ||
919 | t = -ENOMEM; | ||
920 | goto err_free_cb; | ||
921 | } | ||
922 | |||
923 | for (idx = 0; idx < count; ++idx) { | ||
891 | fence = array[idx]; | 924 | fence = array[idx]; |
892 | if (fence) { | 925 | if (fence) { |
893 | cb[idx].task = current; | 926 | cb[idx].task = current; |
894 | if (fence_add_callback(&fence->base, | 927 | if (fence_add_callback(fence, |
895 | &cb[idx].base, amdgpu_fence_wait_cb)) | 928 | &cb[idx].base, amdgpu_fence_wait_cb)) { |
896 | return t; /* return if fence is already signaled */ | 929 | /* The fence is already signaled */ |
930 | if (wait_all) | ||
931 | continue; | ||
932 | else | ||
933 | goto fence_rm_cb; | ||
934 | } | ||
897 | } | 935 | } |
898 | } | 936 | } |
899 | 937 | ||
@@ -907,7 +945,9 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, | |||
907 | * amdgpu_test_signaled_any must be called after | 945 | * amdgpu_test_signaled_any must be called after |
908 | * set_current_state to prevent a race with wake_up_process | 946 | * set_current_state to prevent a race with wake_up_process |
909 | */ | 947 | */ |
910 | if (amdgpu_test_signaled_any(array)) | 948 | if (!wait_all && amdgpu_test_signaled_any(array, count)) |
949 | break; | ||
950 | if (wait_all && amdgpu_test_signaled_all(array, count)) | ||
911 | break; | 951 | break; |
912 | 952 | ||
913 | if (adev->needs_reset) { | 953 | if (adev->needs_reset) { |
@@ -923,13 +963,16 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, | |||
923 | 963 | ||
924 | __set_current_state(TASK_RUNNING); | 964 | __set_current_state(TASK_RUNNING); |
925 | 965 | ||
926 | idx = 0; | 966 | fence_rm_cb: |
927 | for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { | 967 | for (idx = 0; idx < count; ++idx) { |
928 | fence = array[idx]; | 968 | fence = array[idx]; |
929 | if (fence) | 969 | if (fence && cb[idx].base.func) |
930 | fence_remove_callback(&fence->base, &cb[idx].base); | 970 | fence_remove_callback(fence, &cb[idx].base); |
931 | } | 971 | } |
932 | 972 | ||
973 | err_free_cb: | ||
974 | kfree(cb); | ||
975 | |||
933 | return t; | 976 | return t; |
934 | } | 977 | } |
935 | 978 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 5104e64e9ad8..c439735ee670 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | |||
@@ -73,29 +73,12 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, | |||
73 | 73 | ||
74 | if (!vm) | 74 | if (!vm) |
75 | ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); | 75 | ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); |
76 | else | ||
77 | ib->gpu_addr = 0; | ||
78 | |||
79 | } else { | ||
80 | ib->sa_bo = NULL; | ||
81 | ib->ptr = NULL; | ||
82 | ib->gpu_addr = 0; | ||
83 | } | 76 | } |
84 | 77 | ||
85 | amdgpu_sync_create(&ib->sync); | 78 | amdgpu_sync_create(&ib->sync); |
86 | 79 | ||
87 | ib->ring = ring; | 80 | ib->ring = ring; |
88 | ib->fence = NULL; | ||
89 | ib->user = NULL; | ||
90 | ib->vm = vm; | 81 | ib->vm = vm; |
91 | ib->ctx = NULL; | ||
92 | ib->gds_base = 0; | ||
93 | ib->gds_size = 0; | ||
94 | ib->gws_base = 0; | ||
95 | ib->gws_size = 0; | ||
96 | ib->oa_base = 0; | ||
97 | ib->oa_size = 0; | ||
98 | ib->flags = 0; | ||
99 | 82 | ||
100 | return 0; | 83 | return 0; |
101 | } | 84 | } |
@@ -110,8 +93,8 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, | |||
110 | */ | 93 | */ |
111 | void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) | 94 | void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) |
112 | { | 95 | { |
113 | amdgpu_sync_free(adev, &ib->sync, ib->fence); | 96 | amdgpu_sync_free(adev, &ib->sync, &ib->fence->base); |
114 | amdgpu_sa_bo_free(adev, &ib->sa_bo, ib->fence); | 97 | amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base); |
115 | amdgpu_fence_unref(&ib->fence); | 98 | amdgpu_fence_unref(&ib->fence); |
116 | } | 99 | } |
117 | 100 | ||
@@ -143,7 +126,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, | |||
143 | struct amdgpu_ring *ring; | 126 | struct amdgpu_ring *ring; |
144 | struct amdgpu_ctx *ctx, *old_ctx; | 127 | struct amdgpu_ctx *ctx, *old_ctx; |
145 | struct amdgpu_vm *vm; | 128 | struct amdgpu_vm *vm; |
146 | uint64_t sequence; | ||
147 | unsigned i; | 129 | unsigned i; |
148 | int r = 0; | 130 | int r = 0; |
149 | 131 | ||
@@ -158,7 +140,11 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, | |||
158 | dev_err(adev->dev, "couldn't schedule ib\n"); | 140 | dev_err(adev->dev, "couldn't schedule ib\n"); |
159 | return -EINVAL; | 141 | return -EINVAL; |
160 | } | 142 | } |
161 | 143 | r = amdgpu_sync_wait(&ibs->sync); | |
144 | if (r) { | ||
145 | dev_err(adev->dev, "IB sync failed (%d).\n", r); | ||
146 | return r; | ||
147 | } | ||
162 | r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs); | 148 | r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs); |
163 | if (r) { | 149 | if (r) { |
164 | dev_err(adev->dev, "scheduling IB failed (%d).\n", r); | 150 | dev_err(adev->dev, "scheduling IB failed (%d).\n", r); |
@@ -216,12 +202,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, | |||
216 | return r; | 202 | return r; |
217 | } | 203 | } |
218 | 204 | ||
219 | sequence = amdgpu_enable_scheduler ? ib->sequence : 0; | ||
220 | |||
221 | if (!amdgpu_enable_scheduler && ib->ctx) | 205 | if (!amdgpu_enable_scheduler && ib->ctx) |
222 | ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, | 206 | ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, |
223 | &ib->fence->base, | 207 | &ib->fence->base); |
224 | sequence); | ||
225 | 208 | ||
226 | /* wrap the last IB with fence */ | 209 | /* wrap the last IB with fence */ |
227 | if (ib->user) { | 210 | if (ib->user) { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 90044b254404..5c8a803acedc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | |||
@@ -98,18 +98,12 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, | |||
98 | /* add 8 bytes for the rptr/wptr shadows and | 98 | /* add 8 bytes for the rptr/wptr shadows and |
99 | * add them to the end of the ring allocation. | 99 | * add them to the end of the ring allocation. |
100 | */ | 100 | */ |
101 | adev->irq.ih.ring = kzalloc(adev->irq.ih.ring_size + 8, GFP_KERNEL); | 101 | adev->irq.ih.ring = pci_alloc_consistent(adev->pdev, |
102 | adev->irq.ih.ring_size + 8, | ||
103 | &adev->irq.ih.rb_dma_addr); | ||
102 | if (adev->irq.ih.ring == NULL) | 104 | if (adev->irq.ih.ring == NULL) |
103 | return -ENOMEM; | 105 | return -ENOMEM; |
104 | adev->irq.ih.rb_dma_addr = pci_map_single(adev->pdev, | 106 | memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8); |
105 | (void *)adev->irq.ih.ring, | ||
106 | adev->irq.ih.ring_size, | ||
107 | PCI_DMA_BIDIRECTIONAL); | ||
108 | if (pci_dma_mapping_error(adev->pdev, adev->irq.ih.rb_dma_addr)) { | ||
109 | dev_err(&adev->pdev->dev, "Failed to DMA MAP the IH RB page\n"); | ||
110 | kfree((void *)adev->irq.ih.ring); | ||
111 | return -ENOMEM; | ||
112 | } | ||
113 | adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0; | 107 | adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0; |
114 | adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1; | 108 | adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1; |
115 | } | 109 | } |
@@ -149,9 +143,9 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev) | |||
149 | /* add 8 bytes for the rptr/wptr shadows and | 143 | /* add 8 bytes for the rptr/wptr shadows and |
150 | * add them to the end of the ring allocation. | 144 | * add them to the end of the ring allocation. |
151 | */ | 145 | */ |
152 | pci_unmap_single(adev->pdev, adev->irq.ih.rb_dma_addr, | 146 | pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8, |
153 | adev->irq.ih.ring_size + 8, PCI_DMA_BIDIRECTIONAL); | 147 | (void *)adev->irq.ih.ring, |
154 | kfree((void *)adev->irq.ih.ring); | 148 | adev->irq.ih.rb_dma_addr); |
155 | adev->irq.ih.ring = NULL; | 149 | adev->irq.ih.ring = NULL; |
156 | } | 150 | } |
157 | } else { | 151 | } else { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 87da6b1848fd..22367939ebf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | |||
@@ -560,6 +560,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, | |||
560 | if (!fpriv) | 560 | if (!fpriv) |
561 | return; | 561 | return; |
562 | 562 | ||
563 | amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); | ||
564 | |||
563 | amdgpu_vm_fini(adev, &fpriv->vm); | 565 | amdgpu_vm_fini(adev, &fpriv->vm); |
564 | 566 | ||
565 | idr_for_each_entry(&fpriv->bo_list_handles, list, handle) | 567 | idr_for_each_entry(&fpriv->bo_list_handles, list, handle) |
@@ -568,8 +570,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, | |||
568 | idr_destroy(&fpriv->bo_list_handles); | 570 | idr_destroy(&fpriv->bo_list_handles); |
569 | mutex_destroy(&fpriv->bo_list_lock); | 571 | mutex_destroy(&fpriv->bo_list_lock); |
570 | 572 | ||
571 | amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); | ||
572 | |||
573 | kfree(fpriv); | 573 | kfree(fpriv); |
574 | file_priv->driver_priv = NULL; | 574 | file_priv->driver_priv = NULL; |
575 | } | 575 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 238465a9ac55..6ea18dcec561 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | |||
@@ -193,7 +193,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, | |||
193 | unsigned size, unsigned align); | 193 | unsigned size, unsigned align); |
194 | void amdgpu_sa_bo_free(struct amdgpu_device *adev, | 194 | void amdgpu_sa_bo_free(struct amdgpu_device *adev, |
195 | struct amdgpu_sa_bo **sa_bo, | 195 | struct amdgpu_sa_bo **sa_bo, |
196 | struct amdgpu_fence *fence); | 196 | struct fence *fence); |
197 | #if defined(CONFIG_DEBUG_FS) | 197 | #if defined(CONFIG_DEBUG_FS) |
198 | void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, | 198 | void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, |
199 | struct seq_file *m); | 199 | struct seq_file *m); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index d6398cf45f24..b92525329d6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | |||
@@ -139,6 +139,20 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, | |||
139 | return r; | 139 | return r; |
140 | } | 140 | } |
141 | 141 | ||
142 | static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f) | ||
143 | { | ||
144 | struct amdgpu_fence *a_fence; | ||
145 | struct amd_sched_fence *s_fence; | ||
146 | |||
147 | s_fence = to_amd_sched_fence(f); | ||
148 | if (s_fence) | ||
149 | return s_fence->scheduler->ring_id; | ||
150 | a_fence = to_amdgpu_fence(f); | ||
151 | if (a_fence) | ||
152 | return a_fence->ring->idx; | ||
153 | return 0; | ||
154 | } | ||
155 | |||
142 | static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) | 156 | static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) |
143 | { | 157 | { |
144 | struct amdgpu_sa_manager *sa_manager = sa_bo->manager; | 158 | struct amdgpu_sa_manager *sa_manager = sa_bo->manager; |
@@ -147,7 +161,7 @@ static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) | |||
147 | } | 161 | } |
148 | list_del_init(&sa_bo->olist); | 162 | list_del_init(&sa_bo->olist); |
149 | list_del_init(&sa_bo->flist); | 163 | list_del_init(&sa_bo->flist); |
150 | amdgpu_fence_unref(&sa_bo->fence); | 164 | fence_put(sa_bo->fence); |
151 | kfree(sa_bo); | 165 | kfree(sa_bo); |
152 | } | 166 | } |
153 | 167 | ||
@@ -161,7 +175,7 @@ static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager) | |||
161 | sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist); | 175 | sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist); |
162 | list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) { | 176 | list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) { |
163 | if (sa_bo->fence == NULL || | 177 | if (sa_bo->fence == NULL || |
164 | !fence_is_signaled(&sa_bo->fence->base)) { | 178 | !fence_is_signaled(sa_bo->fence)) { |
165 | return; | 179 | return; |
166 | } | 180 | } |
167 | amdgpu_sa_bo_remove_locked(sa_bo); | 181 | amdgpu_sa_bo_remove_locked(sa_bo); |
@@ -246,7 +260,7 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager, | |||
246 | } | 260 | } |
247 | 261 | ||
248 | static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, | 262 | static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, |
249 | struct amdgpu_fence **fences, | 263 | struct fence **fences, |
250 | unsigned *tries) | 264 | unsigned *tries) |
251 | { | 265 | { |
252 | struct amdgpu_sa_bo *best_bo = NULL; | 266 | struct amdgpu_sa_bo *best_bo = NULL; |
@@ -275,7 +289,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, | |||
275 | sa_bo = list_first_entry(&sa_manager->flist[i], | 289 | sa_bo = list_first_entry(&sa_manager->flist[i], |
276 | struct amdgpu_sa_bo, flist); | 290 | struct amdgpu_sa_bo, flist); |
277 | 291 | ||
278 | if (!fence_is_signaled(&sa_bo->fence->base)) { | 292 | if (!fence_is_signaled(sa_bo->fence)) { |
279 | fences[i] = sa_bo->fence; | 293 | fences[i] = sa_bo->fence; |
280 | continue; | 294 | continue; |
281 | } | 295 | } |
@@ -299,7 +313,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, | |||
299 | } | 313 | } |
300 | 314 | ||
301 | if (best_bo) { | 315 | if (best_bo) { |
302 | ++tries[best_bo->fence->ring->idx]; | 316 | uint32_t idx = amdgpu_sa_get_ring_from_fence(best_bo->fence); |
317 | ++tries[idx]; | ||
303 | sa_manager->hole = best_bo->olist.prev; | 318 | sa_manager->hole = best_bo->olist.prev; |
304 | 319 | ||
305 | /* we knew that this one is signaled, | 320 | /* we knew that this one is signaled, |
@@ -315,7 +330,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, | |||
315 | struct amdgpu_sa_bo **sa_bo, | 330 | struct amdgpu_sa_bo **sa_bo, |
316 | unsigned size, unsigned align) | 331 | unsigned size, unsigned align) |
317 | { | 332 | { |
318 | struct amdgpu_fence *fences[AMDGPU_MAX_RINGS]; | 333 | struct fence *fences[AMDGPU_MAX_RINGS]; |
319 | unsigned tries[AMDGPU_MAX_RINGS]; | 334 | unsigned tries[AMDGPU_MAX_RINGS]; |
320 | int i, r; | 335 | int i, r; |
321 | signed long t; | 336 | signed long t; |
@@ -352,7 +367,8 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, | |||
352 | } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries)); | 367 | } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries)); |
353 | 368 | ||
354 | spin_unlock(&sa_manager->wq.lock); | 369 | spin_unlock(&sa_manager->wq.lock); |
355 | t = amdgpu_fence_wait_any(adev, fences, false, MAX_SCHEDULE_TIMEOUT); | 370 | t = amdgpu_fence_wait_multiple(adev, fences, AMDGPU_MAX_RINGS, false, false, |
371 | MAX_SCHEDULE_TIMEOUT); | ||
356 | r = (t > 0) ? 0 : t; | 372 | r = (t > 0) ? 0 : t; |
357 | spin_lock(&sa_manager->wq.lock); | 373 | spin_lock(&sa_manager->wq.lock); |
358 | /* if we have nothing to wait for block */ | 374 | /* if we have nothing to wait for block */ |
@@ -372,7 +388,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, | |||
372 | } | 388 | } |
373 | 389 | ||
374 | void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, | 390 | void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, |
375 | struct amdgpu_fence *fence) | 391 | struct fence *fence) |
376 | { | 392 | { |
377 | struct amdgpu_sa_manager *sa_manager; | 393 | struct amdgpu_sa_manager *sa_manager; |
378 | 394 | ||
@@ -382,10 +398,11 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, | |||
382 | 398 | ||
383 | sa_manager = (*sa_bo)->manager; | 399 | sa_manager = (*sa_bo)->manager; |
384 | spin_lock(&sa_manager->wq.lock); | 400 | spin_lock(&sa_manager->wq.lock); |
385 | if (fence && !fence_is_signaled(&fence->base)) { | 401 | if (fence && !fence_is_signaled(fence)) { |
386 | (*sa_bo)->fence = amdgpu_fence_ref(fence); | 402 | uint32_t idx; |
387 | list_add_tail(&(*sa_bo)->flist, | 403 | (*sa_bo)->fence = fence_get(fence); |
388 | &sa_manager->flist[fence->ring->idx]); | 404 | idx = amdgpu_sa_get_ring_from_fence(fence); |
405 | list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]); | ||
389 | } else { | 406 | } else { |
390 | amdgpu_sa_bo_remove_locked(*sa_bo); | 407 | amdgpu_sa_bo_remove_locked(*sa_bo); |
391 | } | 408 | } |
@@ -412,8 +429,16 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, | |||
412 | seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", | 429 | seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", |
413 | soffset, eoffset, eoffset - soffset); | 430 | soffset, eoffset, eoffset - soffset); |
414 | if (i->fence) { | 431 | if (i->fence) { |
415 | seq_printf(m, " protected by 0x%016llx on ring %d", | 432 | struct amdgpu_fence *a_fence = to_amdgpu_fence(i->fence); |
416 | i->fence->seq, i->fence->ring->idx); | 433 | struct amd_sched_fence *s_fence = to_amd_sched_fence(i->fence); |
434 | if (a_fence) | ||
435 | seq_printf(m, " protected by 0x%016llx on ring %d", | ||
436 | a_fence->seq, a_fence->ring->idx); | ||
437 | if (s_fence) | ||
438 | seq_printf(m, " protected by 0x%016x on ring %d", | ||
439 | s_fence->base.seqno, | ||
440 | s_fence->scheduler->ring_id); | ||
441 | |||
417 | } | 442 | } |
418 | seq_printf(m, "\n"); | 443 | seq_printf(m, "\n"); |
419 | } | 444 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index a86e38158afa..f93fb3541488 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | |||
@@ -27,55 +27,28 @@ | |||
27 | #include <drm/drmP.h> | 27 | #include <drm/drmP.h> |
28 | #include "amdgpu.h" | 28 | #include "amdgpu.h" |
29 | 29 | ||
30 | static int amdgpu_sched_prepare_job(struct amd_gpu_scheduler *sched, | 30 | static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job) |
31 | struct amd_sched_entity *entity, | ||
32 | struct amd_sched_job *job) | ||
33 | { | 31 | { |
34 | int r = 0; | 32 | struct amdgpu_job *sched_job; |
35 | struct amdgpu_cs_parser *sched_job; | ||
36 | if (!job || !job->data) { | ||
37 | DRM_ERROR("job is null\n"); | ||
38 | return -EINVAL; | ||
39 | } | ||
40 | |||
41 | sched_job = (struct amdgpu_cs_parser *)job->data; | ||
42 | if (sched_job->prepare_job) { | ||
43 | r = sched_job->prepare_job(sched_job); | ||
44 | if (r) { | ||
45 | DRM_ERROR("Prepare job error\n"); | ||
46 | schedule_work(&sched_job->job_work); | ||
47 | } | ||
48 | } | ||
49 | return r; | ||
50 | } | ||
51 | |||
52 | static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, | ||
53 | struct amd_sched_entity *entity, | ||
54 | struct amd_sched_job *job) | ||
55 | { | ||
56 | int r = 0; | ||
57 | struct amdgpu_cs_parser *sched_job; | ||
58 | struct amdgpu_fence *fence; | 33 | struct amdgpu_fence *fence; |
34 | int r; | ||
59 | 35 | ||
60 | if (!job || !job->data) { | 36 | if (!job) { |
61 | DRM_ERROR("job is null\n"); | 37 | DRM_ERROR("job is null\n"); |
62 | return NULL; | 38 | return NULL; |
63 | } | 39 | } |
64 | sched_job = (struct amdgpu_cs_parser *)job->data; | 40 | sched_job = (struct amdgpu_job *)job; |
65 | mutex_lock(&sched_job->job_lock); | 41 | mutex_lock(&sched_job->job_lock); |
66 | r = amdgpu_ib_schedule(sched_job->adev, | 42 | r = amdgpu_ib_schedule(sched_job->adev, |
67 | sched_job->num_ibs, | 43 | sched_job->num_ibs, |
68 | sched_job->ibs, | 44 | sched_job->ibs, |
69 | sched_job->filp); | 45 | sched_job->base.owner); |
70 | if (r) | 46 | if (r) |
71 | goto err; | 47 | goto err; |
72 | fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence); | 48 | fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence); |
73 | 49 | ||
74 | if (sched_job->run_job) { | 50 | if (sched_job->free_job) |
75 | r = sched_job->run_job(sched_job); | 51 | sched_job->free_job(sched_job); |
76 | if (r) | ||
77 | goto err; | ||
78 | } | ||
79 | 52 | ||
80 | mutex_unlock(&sched_job->job_lock); | 53 | mutex_unlock(&sched_job->job_lock); |
81 | return &fence->base; | 54 | return &fence->base; |
@@ -83,25 +56,25 @@ static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, | |||
83 | err: | 56 | err: |
84 | DRM_ERROR("Run job error\n"); | 57 | DRM_ERROR("Run job error\n"); |
85 | mutex_unlock(&sched_job->job_lock); | 58 | mutex_unlock(&sched_job->job_lock); |
86 | schedule_work(&sched_job->job_work); | 59 | job->sched->ops->process_job(job); |
87 | return NULL; | 60 | return NULL; |
88 | } | 61 | } |
89 | 62 | ||
90 | static void amdgpu_sched_process_job(struct amd_gpu_scheduler *sched, | 63 | static void amdgpu_sched_process_job(struct amd_sched_job *job) |
91 | struct amd_sched_job *job) | ||
92 | { | 64 | { |
93 | struct amdgpu_cs_parser *sched_job; | 65 | struct amdgpu_job *sched_job; |
94 | 66 | ||
95 | if (!job || !job->data) { | 67 | if (!job) { |
96 | DRM_ERROR("job is null\n"); | 68 | DRM_ERROR("job is null\n"); |
97 | return; | 69 | return; |
98 | } | 70 | } |
99 | sched_job = (struct amdgpu_cs_parser *)job->data; | 71 | sched_job = (struct amdgpu_job *)job; |
100 | schedule_work(&sched_job->job_work); | 72 | /* after processing job, free memory */ |
73 | fence_put(&sched_job->base.s_fence->base); | ||
74 | kfree(sched_job); | ||
101 | } | 75 | } |
102 | 76 | ||
103 | struct amd_sched_backend_ops amdgpu_sched_ops = { | 77 | struct amd_sched_backend_ops amdgpu_sched_ops = { |
104 | .prepare_job = amdgpu_sched_prepare_job, | ||
105 | .run_job = amdgpu_sched_run_job, | 78 | .run_job = amdgpu_sched_run_job, |
106 | .process_job = amdgpu_sched_process_job | 79 | .process_job = amdgpu_sched_process_job |
107 | }; | 80 | }; |
@@ -110,36 +83,39 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, | |||
110 | struct amdgpu_ring *ring, | 83 | struct amdgpu_ring *ring, |
111 | struct amdgpu_ib *ibs, | 84 | struct amdgpu_ib *ibs, |
112 | unsigned num_ibs, | 85 | unsigned num_ibs, |
113 | int (*free_job)(struct amdgpu_cs_parser *), | 86 | int (*free_job)(struct amdgpu_job *), |
114 | void *owner, | 87 | void *owner, |
115 | struct fence **f) | 88 | struct fence **f) |
116 | { | 89 | { |
117 | int r = 0; | 90 | int r = 0; |
118 | if (amdgpu_enable_scheduler) { | 91 | if (amdgpu_enable_scheduler) { |
119 | struct amdgpu_cs_parser *sched_job = | 92 | struct amdgpu_job *job = |
120 | amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx, | 93 | kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); |
121 | ibs, num_ibs); | 94 | if (!job) |
122 | if(!sched_job) { | ||
123 | return -ENOMEM; | 95 | return -ENOMEM; |
124 | } | 96 | job->base.sched = ring->scheduler; |
125 | sched_job->free_job = free_job; | 97 | job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; |
126 | mutex_lock(&sched_job->job_lock); | 98 | job->adev = adev; |
127 | r = amd_sched_push_job(ring->scheduler, | 99 | job->ibs = ibs; |
128 | &adev->kernel_ctx.rings[ring->idx].entity, | 100 | job->num_ibs = num_ibs; |
129 | sched_job, &sched_job->s_fence); | 101 | job->base.owner = owner; |
102 | mutex_init(&job->job_lock); | ||
103 | job->free_job = free_job; | ||
104 | mutex_lock(&job->job_lock); | ||
105 | r = amd_sched_entity_push_job((struct amd_sched_job *)job); | ||
130 | if (r) { | 106 | if (r) { |
131 | mutex_unlock(&sched_job->job_lock); | 107 | mutex_unlock(&job->job_lock); |
132 | kfree(sched_job); | 108 | kfree(job); |
133 | return r; | 109 | return r; |
134 | } | 110 | } |
135 | ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq; | 111 | *f = fence_get(&job->base.s_fence->base); |
136 | *f = fence_get(&sched_job->s_fence->base); | 112 | mutex_unlock(&job->job_lock); |
137 | mutex_unlock(&sched_job->job_lock); | ||
138 | } else { | 113 | } else { |
139 | r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); | 114 | r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); |
140 | if (r) | 115 | if (r) |
141 | return r; | 116 | return r; |
142 | *f = fence_get(&ibs[num_ibs - 1].fence->base); | 117 | *f = fence_get(&ibs[num_ibs - 1].fence->base); |
143 | } | 118 | } |
119 | |||
144 | return 0; | 120 | return 0; |
145 | } | 121 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c index d6d41a42ab65..ff3ca52ec6fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c | |||
@@ -87,7 +87,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring, | |||
87 | 87 | ||
88 | void amdgpu_semaphore_free(struct amdgpu_device *adev, | 88 | void amdgpu_semaphore_free(struct amdgpu_device *adev, |
89 | struct amdgpu_semaphore **semaphore, | 89 | struct amdgpu_semaphore **semaphore, |
90 | struct amdgpu_fence *fence) | 90 | struct fence *fence) |
91 | { | 91 | { |
92 | if (semaphore == NULL || *semaphore == NULL) { | 92 | if (semaphore == NULL || *semaphore == NULL) { |
93 | return; | 93 | return; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 7cb711fc1ee2..4fffb2539331 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | |||
@@ -32,6 +32,11 @@ | |||
32 | #include "amdgpu.h" | 32 | #include "amdgpu.h" |
33 | #include "amdgpu_trace.h" | 33 | #include "amdgpu_trace.h" |
34 | 34 | ||
35 | struct amdgpu_sync_entry { | ||
36 | struct hlist_node node; | ||
37 | struct fence *fence; | ||
38 | }; | ||
39 | |||
35 | /** | 40 | /** |
36 | * amdgpu_sync_create - zero init sync object | 41 | * amdgpu_sync_create - zero init sync object |
37 | * | 42 | * |
@@ -49,9 +54,33 @@ void amdgpu_sync_create(struct amdgpu_sync *sync) | |||
49 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) | 54 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) |
50 | sync->sync_to[i] = NULL; | 55 | sync->sync_to[i] = NULL; |
51 | 56 | ||
57 | hash_init(sync->fences); | ||
52 | sync->last_vm_update = NULL; | 58 | sync->last_vm_update = NULL; |
53 | } | 59 | } |
54 | 60 | ||
61 | static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) | ||
62 | { | ||
63 | struct amdgpu_fence *a_fence = to_amdgpu_fence(f); | ||
64 | struct amd_sched_fence *s_fence = to_amd_sched_fence(f); | ||
65 | |||
66 | if (a_fence) | ||
67 | return a_fence->ring->adev == adev; | ||
68 | if (s_fence) | ||
69 | return (struct amdgpu_device *)s_fence->scheduler->priv == adev; | ||
70 | return false; | ||
71 | } | ||
72 | |||
73 | static bool amdgpu_sync_test_owner(struct fence *f, void *owner) | ||
74 | { | ||
75 | struct amdgpu_fence *a_fence = to_amdgpu_fence(f); | ||
76 | struct amd_sched_fence *s_fence = to_amd_sched_fence(f); | ||
77 | if (s_fence) | ||
78 | return s_fence->owner == owner; | ||
79 | if (a_fence) | ||
80 | return a_fence->owner == owner; | ||
81 | return false; | ||
82 | } | ||
83 | |||
55 | /** | 84 | /** |
56 | * amdgpu_sync_fence - remember to sync to this fence | 85 | * amdgpu_sync_fence - remember to sync to this fence |
57 | * | 86 | * |
@@ -62,28 +91,54 @@ void amdgpu_sync_create(struct amdgpu_sync *sync) | |||
62 | int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, | 91 | int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, |
63 | struct fence *f) | 92 | struct fence *f) |
64 | { | 93 | { |
94 | struct amdgpu_sync_entry *e; | ||
65 | struct amdgpu_fence *fence; | 95 | struct amdgpu_fence *fence; |
66 | struct amdgpu_fence *other; | 96 | struct amdgpu_fence *other; |
97 | struct fence *tmp, *later; | ||
67 | 98 | ||
68 | if (!f) | 99 | if (!f) |
69 | return 0; | 100 | return 0; |
70 | 101 | ||
102 | if (amdgpu_sync_same_dev(adev, f) && | ||
103 | amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) { | ||
104 | if (sync->last_vm_update) { | ||
105 | tmp = sync->last_vm_update; | ||
106 | BUG_ON(f->context != tmp->context); | ||
107 | later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp; | ||
108 | sync->last_vm_update = fence_get(later); | ||
109 | fence_put(tmp); | ||
110 | } else | ||
111 | sync->last_vm_update = fence_get(f); | ||
112 | } | ||
113 | |||
71 | fence = to_amdgpu_fence(f); | 114 | fence = to_amdgpu_fence(f); |
72 | if (!fence || fence->ring->adev != adev) | 115 | if (!fence || fence->ring->adev != adev) { |
73 | return fence_wait(f, true); | 116 | hash_for_each_possible(sync->fences, e, node, f->context) { |
117 | struct fence *new; | ||
118 | if (unlikely(e->fence->context != f->context)) | ||
119 | continue; | ||
120 | new = fence_get(fence_later(e->fence, f)); | ||
121 | if (new) { | ||
122 | fence_put(e->fence); | ||
123 | e->fence = new; | ||
124 | } | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL); | ||
129 | if (!e) | ||
130 | return -ENOMEM; | ||
131 | |||
132 | hash_add(sync->fences, &e->node, f->context); | ||
133 | e->fence = fence_get(f); | ||
134 | return 0; | ||
135 | } | ||
74 | 136 | ||
75 | other = sync->sync_to[fence->ring->idx]; | 137 | other = sync->sync_to[fence->ring->idx]; |
76 | sync->sync_to[fence->ring->idx] = amdgpu_fence_ref( | 138 | sync->sync_to[fence->ring->idx] = amdgpu_fence_ref( |
77 | amdgpu_fence_later(fence, other)); | 139 | amdgpu_fence_later(fence, other)); |
78 | amdgpu_fence_unref(&other); | 140 | amdgpu_fence_unref(&other); |
79 | 141 | ||
80 | if (fence->owner == AMDGPU_FENCE_OWNER_VM) { | ||
81 | other = sync->last_vm_update; | ||
82 | sync->last_vm_update = amdgpu_fence_ref( | ||
83 | amdgpu_fence_later(fence, other)); | ||
84 | amdgpu_fence_unref(&other); | ||
85 | } | ||
86 | |||
87 | return 0; | 142 | return 0; |
88 | } | 143 | } |
89 | 144 | ||
@@ -147,6 +202,24 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, | |||
147 | return r; | 202 | return r; |
148 | } | 203 | } |
149 | 204 | ||
205 | int amdgpu_sync_wait(struct amdgpu_sync *sync) | ||
206 | { | ||
207 | struct amdgpu_sync_entry *e; | ||
208 | struct hlist_node *tmp; | ||
209 | int i, r; | ||
210 | |||
211 | hash_for_each_safe(sync->fences, i, tmp, e, node) { | ||
212 | r = fence_wait(e->fence, false); | ||
213 | if (r) | ||
214 | return r; | ||
215 | |||
216 | hash_del(&e->node); | ||
217 | fence_put(e->fence); | ||
218 | kfree(e); | ||
219 | } | ||
220 | return 0; | ||
221 | } | ||
222 | |||
150 | /** | 223 | /** |
151 | * amdgpu_sync_rings - sync ring to all registered fences | 224 | * amdgpu_sync_rings - sync ring to all registered fences |
152 | * | 225 | * |
@@ -234,15 +307,23 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync, | |||
234 | */ | 307 | */ |
235 | void amdgpu_sync_free(struct amdgpu_device *adev, | 308 | void amdgpu_sync_free(struct amdgpu_device *adev, |
236 | struct amdgpu_sync *sync, | 309 | struct amdgpu_sync *sync, |
237 | struct amdgpu_fence *fence) | 310 | struct fence *fence) |
238 | { | 311 | { |
312 | struct amdgpu_sync_entry *e; | ||
313 | struct hlist_node *tmp; | ||
239 | unsigned i; | 314 | unsigned i; |
240 | 315 | ||
316 | hash_for_each_safe(sync->fences, i, tmp, e, node) { | ||
317 | hash_del(&e->node); | ||
318 | fence_put(e->fence); | ||
319 | kfree(e); | ||
320 | } | ||
321 | |||
241 | for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) | 322 | for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) |
242 | amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); | 323 | amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); |
243 | 324 | ||
244 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) | 325 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) |
245 | amdgpu_fence_unref(&sync->sync_to[i]); | 326 | amdgpu_fence_unref(&sync->sync_to[i]); |
246 | 327 | ||
247 | amdgpu_fence_unref(&sync->last_vm_update); | 328 | fence_put(sync->last_vm_update); |
248 | } | 329 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 962dd5552137..f80b1a43be8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | |||
@@ -77,7 +77,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) | |||
77 | void *gtt_map, *vram_map; | 77 | void *gtt_map, *vram_map; |
78 | void **gtt_start, **gtt_end; | 78 | void **gtt_start, **gtt_end; |
79 | void **vram_start, **vram_end; | 79 | void **vram_start, **vram_end; |
80 | struct amdgpu_fence *fence = NULL; | 80 | struct fence *fence = NULL; |
81 | 81 | ||
82 | r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, | 82 | r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, |
83 | AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i); | 83 | AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i); |
@@ -116,13 +116,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) | |||
116 | goto out_lclean_unpin; | 116 | goto out_lclean_unpin; |
117 | } | 117 | } |
118 | 118 | ||
119 | r = fence_wait(&fence->base, false); | 119 | r = fence_wait(fence, false); |
120 | if (r) { | 120 | if (r) { |
121 | DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); | 121 | DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); |
122 | goto out_lclean_unpin; | 122 | goto out_lclean_unpin; |
123 | } | 123 | } |
124 | 124 | ||
125 | amdgpu_fence_unref(&fence); | 125 | fence_put(fence); |
126 | 126 | ||
127 | r = amdgpu_bo_kmap(vram_obj, &vram_map); | 127 | r = amdgpu_bo_kmap(vram_obj, &vram_map); |
128 | if (r) { | 128 | if (r) { |
@@ -161,13 +161,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) | |||
161 | goto out_lclean_unpin; | 161 | goto out_lclean_unpin; |
162 | } | 162 | } |
163 | 163 | ||
164 | r = fence_wait(&fence->base, false); | 164 | r = fence_wait(fence, false); |
165 | if (r) { | 165 | if (r) { |
166 | DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); | 166 | DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); |
167 | goto out_lclean_unpin; | 167 | goto out_lclean_unpin; |
168 | } | 168 | } |
169 | 169 | ||
170 | amdgpu_fence_unref(&fence); | 170 | fence_put(fence); |
171 | 171 | ||
172 | r = amdgpu_bo_kmap(gtt_obj[i], >t_map); | 172 | r = amdgpu_bo_kmap(gtt_obj[i], >t_map); |
173 | if (r) { | 173 | if (r) { |
@@ -214,7 +214,7 @@ out_lclean: | |||
214 | amdgpu_bo_unref(>t_obj[i]); | 214 | amdgpu_bo_unref(>t_obj[i]); |
215 | } | 215 | } |
216 | if (fence) | 216 | if (fence) |
217 | amdgpu_fence_unref(&fence); | 217 | fence_put(fence); |
218 | break; | 218 | break; |
219 | } | 219 | } |
220 | 220 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index dd3415d2e45d..399143541d8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |||
@@ -228,7 +228,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, | |||
228 | struct amdgpu_device *adev; | 228 | struct amdgpu_device *adev; |
229 | struct amdgpu_ring *ring; | 229 | struct amdgpu_ring *ring; |
230 | uint64_t old_start, new_start; | 230 | uint64_t old_start, new_start; |
231 | struct amdgpu_fence *fence; | 231 | struct fence *fence; |
232 | int r; | 232 | int r; |
233 | 233 | ||
234 | adev = amdgpu_get_adev(bo->bdev); | 234 | adev = amdgpu_get_adev(bo->bdev); |
@@ -269,9 +269,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, | |||
269 | new_mem->num_pages * PAGE_SIZE, /* bytes */ | 269 | new_mem->num_pages * PAGE_SIZE, /* bytes */ |
270 | bo->resv, &fence); | 270 | bo->resv, &fence); |
271 | /* FIXME: handle copy error */ | 271 | /* FIXME: handle copy error */ |
272 | r = ttm_bo_move_accel_cleanup(bo, &fence->base, | 272 | r = ttm_bo_move_accel_cleanup(bo, fence, |
273 | evict, no_wait_gpu, new_mem); | 273 | evict, no_wait_gpu, new_mem); |
274 | amdgpu_fence_unref(&fence); | 274 | fence_put(fence); |
275 | return r; | 275 | return r; |
276 | } | 276 | } |
277 | 277 | ||
@@ -987,46 +987,48 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, | |||
987 | uint64_t dst_offset, | 987 | uint64_t dst_offset, |
988 | uint32_t byte_count, | 988 | uint32_t byte_count, |
989 | struct reservation_object *resv, | 989 | struct reservation_object *resv, |
990 | struct amdgpu_fence **fence) | 990 | struct fence **fence) |
991 | { | 991 | { |
992 | struct amdgpu_device *adev = ring->adev; | 992 | struct amdgpu_device *adev = ring->adev; |
993 | struct amdgpu_sync sync; | ||
994 | uint32_t max_bytes; | 993 | uint32_t max_bytes; |
995 | unsigned num_loops, num_dw; | 994 | unsigned num_loops, num_dw; |
995 | struct amdgpu_ib *ib; | ||
996 | unsigned i; | 996 | unsigned i; |
997 | int r; | 997 | int r; |
998 | 998 | ||
999 | /* sync other rings */ | ||
1000 | amdgpu_sync_create(&sync); | ||
1001 | if (resv) { | ||
1002 | r = amdgpu_sync_resv(adev, &sync, resv, false); | ||
1003 | if (r) { | ||
1004 | DRM_ERROR("sync failed (%d).\n", r); | ||
1005 | amdgpu_sync_free(adev, &sync, NULL); | ||
1006 | return r; | ||
1007 | } | ||
1008 | } | ||
1009 | |||
1010 | max_bytes = adev->mman.buffer_funcs->copy_max_bytes; | 999 | max_bytes = adev->mman.buffer_funcs->copy_max_bytes; |
1011 | num_loops = DIV_ROUND_UP(byte_count, max_bytes); | 1000 | num_loops = DIV_ROUND_UP(byte_count, max_bytes); |
1012 | num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; | 1001 | num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; |
1013 | 1002 | ||
1014 | /* for fence and sync */ | 1003 | /* for IB padding */ |
1015 | num_dw += 64 + AMDGPU_NUM_SYNCS * 8; | 1004 | while (num_dw & 0x7) |
1005 | num_dw++; | ||
1006 | |||
1007 | ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); | ||
1008 | if (!ib) | ||
1009 | return -ENOMEM; | ||
1016 | 1010 | ||
1017 | r = amdgpu_ring_lock(ring, num_dw); | 1011 | r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib); |
1018 | if (r) { | 1012 | if (r) { |
1019 | DRM_ERROR("ring lock failed (%d).\n", r); | 1013 | kfree(ib); |
1020 | amdgpu_sync_free(adev, &sync, NULL); | ||
1021 | return r; | 1014 | return r; |
1022 | } | 1015 | } |
1023 | 1016 | ||
1024 | amdgpu_sync_rings(&sync, ring); | 1017 | ib->length_dw = 0; |
1018 | |||
1019 | if (resv) { | ||
1020 | r = amdgpu_sync_resv(adev, &ib->sync, resv, | ||
1021 | AMDGPU_FENCE_OWNER_UNDEFINED); | ||
1022 | if (r) { | ||
1023 | DRM_ERROR("sync failed (%d).\n", r); | ||
1024 | goto error_free; | ||
1025 | } | ||
1026 | } | ||
1025 | 1027 | ||
1026 | for (i = 0; i < num_loops; i++) { | 1028 | for (i = 0; i < num_loops; i++) { |
1027 | uint32_t cur_size_in_bytes = min(byte_count, max_bytes); | 1029 | uint32_t cur_size_in_bytes = min(byte_count, max_bytes); |
1028 | 1030 | ||
1029 | amdgpu_emit_copy_buffer(adev, ring, src_offset, dst_offset, | 1031 | amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset, |
1030 | cur_size_in_bytes); | 1032 | cur_size_in_bytes); |
1031 | 1033 | ||
1032 | src_offset += cur_size_in_bytes; | 1034 | src_offset += cur_size_in_bytes; |
@@ -1034,17 +1036,24 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, | |||
1034 | byte_count -= cur_size_in_bytes; | 1036 | byte_count -= cur_size_in_bytes; |
1035 | } | 1037 | } |
1036 | 1038 | ||
1037 | r = amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_MOVE, fence); | 1039 | amdgpu_vm_pad_ib(adev, ib); |
1038 | if (r) { | 1040 | WARN_ON(ib->length_dw > num_dw); |
1039 | amdgpu_ring_unlock_undo(ring); | 1041 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, |
1040 | amdgpu_sync_free(adev, &sync, NULL); | 1042 | &amdgpu_vm_free_job, |
1041 | return r; | 1043 | AMDGPU_FENCE_OWNER_MOVE, |
1042 | } | 1044 | fence); |
1043 | 1045 | if (r) | |
1044 | amdgpu_ring_unlock_commit(ring); | 1046 | goto error_free; |
1045 | amdgpu_sync_free(adev, &sync, *fence); | ||
1046 | 1047 | ||
1048 | if (!amdgpu_enable_scheduler) { | ||
1049 | amdgpu_ib_free(adev, ib); | ||
1050 | kfree(ib); | ||
1051 | } | ||
1047 | return 0; | 1052 | return 0; |
1053 | error_free: | ||
1054 | amdgpu_ib_free(adev, ib); | ||
1055 | kfree(ib); | ||
1056 | return r; | ||
1048 | } | 1057 | } |
1049 | 1058 | ||
1050 | #if defined(CONFIG_DEBUG_FS) | 1059 | #if defined(CONFIG_DEBUG_FS) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 68369cf1e318..b87355ccfb1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | |||
@@ -807,7 +807,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) | |||
807 | } | 807 | } |
808 | 808 | ||
809 | static int amdgpu_uvd_free_job( | 809 | static int amdgpu_uvd_free_job( |
810 | struct amdgpu_cs_parser *sched_job) | 810 | struct amdgpu_job *sched_job) |
811 | { | 811 | { |
812 | amdgpu_ib_free(sched_job->adev, sched_job->ibs); | 812 | amdgpu_ib_free(sched_job->adev, sched_job->ibs); |
813 | kfree(sched_job->ibs); | 813 | kfree(sched_job->ibs); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 33ee6ae28f37..1a984c934b1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | |||
@@ -340,7 +340,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) | |||
340 | } | 340 | } |
341 | 341 | ||
342 | static int amdgpu_vce_free_job( | 342 | static int amdgpu_vce_free_job( |
343 | struct amdgpu_cs_parser *sched_job) | 343 | struct amdgpu_job *sched_job) |
344 | { | 344 | { |
345 | amdgpu_ib_free(sched_job->adev, sched_job->ibs); | 345 | amdgpu_ib_free(sched_job->adev, sched_job->ibs); |
346 | kfree(sched_job->ibs); | 346 | kfree(sched_job->ibs); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a78a206e176e..83b7ce6f5f72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -200,19 +200,29 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | |||
200 | */ | 200 | */ |
201 | void amdgpu_vm_flush(struct amdgpu_ring *ring, | 201 | void amdgpu_vm_flush(struct amdgpu_ring *ring, |
202 | struct amdgpu_vm *vm, | 202 | struct amdgpu_vm *vm, |
203 | struct amdgpu_fence *updates) | 203 | struct fence *updates) |
204 | { | 204 | { |
205 | uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); | 205 | uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); |
206 | struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; | 206 | struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; |
207 | struct amdgpu_fence *flushed_updates = vm_id->flushed_updates; | 207 | struct fence *flushed_updates = vm_id->flushed_updates; |
208 | bool is_earlier = false; | ||
209 | |||
210 | if (flushed_updates && updates) { | ||
211 | BUG_ON(flushed_updates->context != updates->context); | ||
212 | is_earlier = (updates->seqno - flushed_updates->seqno <= | ||
213 | INT_MAX) ? true : false; | ||
214 | } | ||
208 | 215 | ||
209 | if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates || | 216 | if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates || |
210 | (updates && amdgpu_fence_is_earlier(flushed_updates, updates))) { | 217 | is_earlier) { |
211 | 218 | ||
212 | trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); | 219 | trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); |
213 | vm_id->flushed_updates = amdgpu_fence_ref( | 220 | if (is_earlier) { |
214 | amdgpu_fence_later(flushed_updates, updates)); | 221 | vm_id->flushed_updates = fence_get(updates); |
215 | amdgpu_fence_unref(&flushed_updates); | 222 | fence_put(flushed_updates); |
223 | } | ||
224 | if (!flushed_updates) | ||
225 | vm_id->flushed_updates = fence_get(updates); | ||
216 | vm_id->pd_gpu_addr = pd_addr; | 226 | vm_id->pd_gpu_addr = pd_addr; |
217 | amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); | 227 | amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); |
218 | } | 228 | } |
@@ -306,8 +316,7 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, | |||
306 | } | 316 | } |
307 | } | 317 | } |
308 | 318 | ||
309 | static int amdgpu_vm_free_job( | 319 | int amdgpu_vm_free_job(struct amdgpu_job *sched_job) |
310 | struct amdgpu_cs_parser *sched_job) | ||
311 | { | 320 | { |
312 | int i; | 321 | int i; |
313 | for (i = 0; i < sched_job->num_ibs; i++) | 322 | for (i = 0; i < sched_job->num_ibs; i++) |
@@ -1347,7 +1356,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1347 | fence_put(vm->page_directory_fence); | 1356 | fence_put(vm->page_directory_fence); |
1348 | 1357 | ||
1349 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 1358 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
1350 | amdgpu_fence_unref(&vm->ids[i].flushed_updates); | 1359 | fence_put(vm->ids[i].flushed_updates); |
1351 | amdgpu_fence_unref(&vm->ids[i].last_id_use); | 1360 | amdgpu_fence_unref(&vm->ids[i].last_id_use); |
1352 | } | 1361 | } |
1353 | 1362 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 2b4242b39b0a..3920c1e346f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c | |||
@@ -630,6 +630,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) | |||
630 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 630 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
631 | tmp = 0xCAFEDEAD; | 631 | tmp = 0xCAFEDEAD; |
632 | adev->wb.wb[index] = cpu_to_le32(tmp); | 632 | adev->wb.wb[index] = cpu_to_le32(tmp); |
633 | memset(&ib, 0, sizeof(ib)); | ||
633 | r = amdgpu_ib_get(ring, NULL, 256, &ib); | 634 | r = amdgpu_ib_get(ring, NULL, 256, &ib); |
634 | if (r) { | 635 | if (r) { |
635 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); | 636 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); |
@@ -1338,18 +1339,18 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev) | |||
1338 | * Used by the amdgpu ttm implementation to move pages if | 1339 | * Used by the amdgpu ttm implementation to move pages if |
1339 | * registered as the asic copy callback. | 1340 | * registered as the asic copy callback. |
1340 | */ | 1341 | */ |
1341 | static void cik_sdma_emit_copy_buffer(struct amdgpu_ring *ring, | 1342 | static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, |
1342 | uint64_t src_offset, | 1343 | uint64_t src_offset, |
1343 | uint64_t dst_offset, | 1344 | uint64_t dst_offset, |
1344 | uint32_t byte_count) | 1345 | uint32_t byte_count) |
1345 | { | 1346 | { |
1346 | amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); | 1347 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); |
1347 | amdgpu_ring_write(ring, byte_count); | 1348 | ib->ptr[ib->length_dw++] = byte_count; |
1348 | amdgpu_ring_write(ring, 0); /* src/dst endian swap */ | 1349 | ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ |
1349 | amdgpu_ring_write(ring, lower_32_bits(src_offset)); | 1350 | ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); |
1350 | amdgpu_ring_write(ring, upper_32_bits(src_offset)); | 1351 | ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); |
1351 | amdgpu_ring_write(ring, lower_32_bits(dst_offset)); | 1352 | ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); |
1352 | amdgpu_ring_write(ring, upper_32_bits(dst_offset)); | 1353 | ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); |
1353 | } | 1354 | } |
1354 | 1355 | ||
1355 | /** | 1356 | /** |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 9b0cab413677..fab7b236f37f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | |||
@@ -2660,6 +2660,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) | |||
2660 | return r; | 2660 | return r; |
2661 | } | 2661 | } |
2662 | WREG32(scratch, 0xCAFEDEAD); | 2662 | WREG32(scratch, 0xCAFEDEAD); |
2663 | memset(&ib, 0, sizeof(ib)); | ||
2663 | r = amdgpu_ib_get(ring, NULL, 256, &ib); | 2664 | r = amdgpu_ib_get(ring, NULL, 256, &ib); |
2664 | if (r) { | 2665 | if (r) { |
2665 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); | 2666 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 4b68e6306f40..818edb37fa9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | |||
@@ -622,6 +622,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) | |||
622 | return r; | 622 | return r; |
623 | } | 623 | } |
624 | WREG32(scratch, 0xCAFEDEAD); | 624 | WREG32(scratch, 0xCAFEDEAD); |
625 | memset(&ib, 0, sizeof(ib)); | ||
625 | r = amdgpu_ib_get(ring, NULL, 256, &ib); | 626 | r = amdgpu_ib_get(ring, NULL, 256, &ib); |
626 | if (r) { | 627 | if (r) { |
627 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); | 628 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); |
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 9de8104eddeb..715e02d3bfba 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | |||
@@ -689,6 +689,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) | |||
689 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 689 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
690 | tmp = 0xCAFEDEAD; | 690 | tmp = 0xCAFEDEAD; |
691 | adev->wb.wb[index] = cpu_to_le32(tmp); | 691 | adev->wb.wb[index] = cpu_to_le32(tmp); |
692 | memset(&ib, 0, sizeof(ib)); | ||
692 | r = amdgpu_ib_get(ring, NULL, 256, &ib); | 693 | r = amdgpu_ib_get(ring, NULL, 256, &ib); |
693 | if (r) { | 694 | if (r) { |
694 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); | 695 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); |
@@ -1349,19 +1350,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) | |||
1349 | * Used by the amdgpu ttm implementation to move pages if | 1350 | * Used by the amdgpu ttm implementation to move pages if |
1350 | * registered as the asic copy callback. | 1351 | * registered as the asic copy callback. |
1351 | */ | 1352 | */ |
1352 | static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring, | 1353 | static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, |
1353 | uint64_t src_offset, | 1354 | uint64_t src_offset, |
1354 | uint64_t dst_offset, | 1355 | uint64_t dst_offset, |
1355 | uint32_t byte_count) | 1356 | uint32_t byte_count) |
1356 | { | 1357 | { |
1357 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | | 1358 | ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | |
1358 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); | 1359 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); |
1359 | amdgpu_ring_write(ring, byte_count); | 1360 | ib->ptr[ib->length_dw++] = byte_count; |
1360 | amdgpu_ring_write(ring, 0); /* src/dst endian swap */ | 1361 | ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ |
1361 | amdgpu_ring_write(ring, lower_32_bits(src_offset)); | 1362 | ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); |
1362 | amdgpu_ring_write(ring, upper_32_bits(src_offset)); | 1363 | ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); |
1363 | amdgpu_ring_write(ring, lower_32_bits(dst_offset)); | 1364 | ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); |
1364 | amdgpu_ring_write(ring, upper_32_bits(dst_offset)); | 1365 | ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); |
1365 | } | 1366 | } |
1366 | 1367 | ||
1367 | /** | 1368 | /** |
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 029f3455f9f9..67128c8e78b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | |||
@@ -810,6 +810,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) | |||
810 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 810 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
811 | tmp = 0xCAFEDEAD; | 811 | tmp = 0xCAFEDEAD; |
812 | adev->wb.wb[index] = cpu_to_le32(tmp); | 812 | adev->wb.wb[index] = cpu_to_le32(tmp); |
813 | memset(&ib, 0, sizeof(ib)); | ||
813 | r = amdgpu_ib_get(ring, NULL, 256, &ib); | 814 | r = amdgpu_ib_get(ring, NULL, 256, &ib); |
814 | if (r) { | 815 | if (r) { |
815 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); | 816 | DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); |
@@ -1473,19 +1474,19 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) | |||
1473 | * Used by the amdgpu ttm implementation to move pages if | 1474 | * Used by the amdgpu ttm implementation to move pages if |
1474 | * registered as the asic copy callback. | 1475 | * registered as the asic copy callback. |
1475 | */ | 1476 | */ |
1476 | static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ring *ring, | 1477 | static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, |
1477 | uint64_t src_offset, | 1478 | uint64_t src_offset, |
1478 | uint64_t dst_offset, | 1479 | uint64_t dst_offset, |
1479 | uint32_t byte_count) | 1480 | uint32_t byte_count) |
1480 | { | 1481 | { |
1481 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | | 1482 | ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | |
1482 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); | 1483 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); |
1483 | amdgpu_ring_write(ring, byte_count); | 1484 | ib->ptr[ib->length_dw++] = byte_count; |
1484 | amdgpu_ring_write(ring, 0); /* src/dst endian swap */ | 1485 | ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ |
1485 | amdgpu_ring_write(ring, lower_32_bits(src_offset)); | 1486 | ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); |
1486 | amdgpu_ring_write(ring, upper_32_bits(src_offset)); | 1487 | ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); |
1487 | amdgpu_ring_write(ring, lower_32_bits(dst_offset)); | 1488 | ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); |
1488 | amdgpu_ring_write(ring, upper_32_bits(dst_offset)); | 1489 | ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); |
1489 | } | 1490 | } |
1490 | 1491 | ||
1491 | /** | 1492 | /** |
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 265d3e2f63cc..d99fe90991dc 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | |||
@@ -27,30 +27,32 @@ | |||
27 | #include <drm/drmP.h> | 27 | #include <drm/drmP.h> |
28 | #include "gpu_scheduler.h" | 28 | #include "gpu_scheduler.h" |
29 | 29 | ||
30 | static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); | ||
31 | |||
30 | /* Initialize a given run queue struct */ | 32 | /* Initialize a given run queue struct */ |
31 | static void amd_sched_rq_init(struct amd_sched_rq *rq) | 33 | static void amd_sched_rq_init(struct amd_sched_rq *rq) |
32 | { | 34 | { |
35 | spin_lock_init(&rq->lock); | ||
33 | INIT_LIST_HEAD(&rq->entities); | 36 | INIT_LIST_HEAD(&rq->entities); |
34 | mutex_init(&rq->lock); | ||
35 | rq->current_entity = NULL; | 37 | rq->current_entity = NULL; |
36 | } | 38 | } |
37 | 39 | ||
38 | static void amd_sched_rq_add_entity(struct amd_sched_rq *rq, | 40 | static void amd_sched_rq_add_entity(struct amd_sched_rq *rq, |
39 | struct amd_sched_entity *entity) | 41 | struct amd_sched_entity *entity) |
40 | { | 42 | { |
41 | mutex_lock(&rq->lock); | 43 | spin_lock(&rq->lock); |
42 | list_add_tail(&entity->list, &rq->entities); | 44 | list_add_tail(&entity->list, &rq->entities); |
43 | mutex_unlock(&rq->lock); | 45 | spin_unlock(&rq->lock); |
44 | } | 46 | } |
45 | 47 | ||
46 | static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq, | 48 | static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq, |
47 | struct amd_sched_entity *entity) | 49 | struct amd_sched_entity *entity) |
48 | { | 50 | { |
49 | mutex_lock(&rq->lock); | 51 | spin_lock(&rq->lock); |
50 | list_del_init(&entity->list); | 52 | list_del_init(&entity->list); |
51 | if (rq->current_entity == entity) | 53 | if (rq->current_entity == entity) |
52 | rq->current_entity = NULL; | 54 | rq->current_entity = NULL; |
53 | mutex_unlock(&rq->lock); | 55 | spin_unlock(&rq->lock); |
54 | } | 56 | } |
55 | 57 | ||
56 | /** | 58 | /** |
@@ -61,12 +63,16 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq, | |||
61 | static struct amd_sched_entity * | 63 | static struct amd_sched_entity * |
62 | amd_sched_rq_select_entity(struct amd_sched_rq *rq) | 64 | amd_sched_rq_select_entity(struct amd_sched_rq *rq) |
63 | { | 65 | { |
64 | struct amd_sched_entity *entity = rq->current_entity; | 66 | struct amd_sched_entity *entity; |
67 | |||
68 | spin_lock(&rq->lock); | ||
65 | 69 | ||
70 | entity = rq->current_entity; | ||
66 | if (entity) { | 71 | if (entity) { |
67 | list_for_each_entry_continue(entity, &rq->entities, list) { | 72 | list_for_each_entry_continue(entity, &rq->entities, list) { |
68 | if (!kfifo_is_empty(&entity->job_queue)) { | 73 | if (!kfifo_is_empty(&entity->job_queue)) { |
69 | rq->current_entity = entity; | 74 | rq->current_entity = entity; |
75 | spin_unlock(&rq->lock); | ||
70 | return rq->current_entity; | 76 | return rq->current_entity; |
71 | } | 77 | } |
72 | } | 78 | } |
@@ -76,6 +82,7 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq) | |||
76 | 82 | ||
77 | if (!kfifo_is_empty(&entity->job_queue)) { | 83 | if (!kfifo_is_empty(&entity->job_queue)) { |
78 | rq->current_entity = entity; | 84 | rq->current_entity = entity; |
85 | spin_unlock(&rq->lock); | ||
79 | return rq->current_entity; | 86 | return rq->current_entity; |
80 | } | 87 | } |
81 | 88 | ||
@@ -83,76 +90,9 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq) | |||
83 | break; | 90 | break; |
84 | } | 91 | } |
85 | 92 | ||
86 | return NULL; | 93 | spin_unlock(&rq->lock); |
87 | } | ||
88 | 94 | ||
89 | /** | 95 | return NULL; |
90 | * Note: This function should only been called inside scheduler main | ||
91 | * function for thread safety, there is no other protection here. | ||
92 | * return ture if scheduler has something ready to run. | ||
93 | * | ||
94 | * For active_hw_rq, there is only one producer(scheduler thread) and | ||
95 | * one consumer(ISR). It should be safe to use this function in scheduler | ||
96 | * main thread to decide whether to continue emit more IBs. | ||
97 | */ | ||
98 | static bool is_scheduler_ready(struct amd_gpu_scheduler *sched) | ||
99 | { | ||
100 | unsigned long flags; | ||
101 | bool full; | ||
102 | |||
103 | spin_lock_irqsave(&sched->queue_lock, flags); | ||
104 | full = atomic64_read(&sched->hw_rq_count) < | ||
105 | sched->hw_submission_limit ? true : false; | ||
106 | spin_unlock_irqrestore(&sched->queue_lock, flags); | ||
107 | |||
108 | return full; | ||
109 | } | ||
110 | |||
111 | /** | ||
112 | * Select next entity from the kernel run queue, if not available, | ||
113 | * return null. | ||
114 | */ | ||
115 | static struct amd_sched_entity * | ||
116 | kernel_rq_select_context(struct amd_gpu_scheduler *sched) | ||
117 | { | ||
118 | struct amd_sched_entity *sched_entity; | ||
119 | struct amd_sched_rq *rq = &sched->kernel_rq; | ||
120 | |||
121 | mutex_lock(&rq->lock); | ||
122 | sched_entity = amd_sched_rq_select_entity(rq); | ||
123 | mutex_unlock(&rq->lock); | ||
124 | return sched_entity; | ||
125 | } | ||
126 | |||
127 | /** | ||
128 | * Select next entity containing real IB submissions | ||
129 | */ | ||
130 | static struct amd_sched_entity * | ||
131 | select_context(struct amd_gpu_scheduler *sched) | ||
132 | { | ||
133 | struct amd_sched_entity *wake_entity = NULL; | ||
134 | struct amd_sched_entity *tmp; | ||
135 | struct amd_sched_rq *rq; | ||
136 | |||
137 | if (!is_scheduler_ready(sched)) | ||
138 | return NULL; | ||
139 | |||
140 | /* Kernel run queue has higher priority than normal run queue*/ | ||
141 | tmp = kernel_rq_select_context(sched); | ||
142 | if (tmp != NULL) | ||
143 | goto exit; | ||
144 | |||
145 | rq = &sched->sched_rq; | ||
146 | mutex_lock(&rq->lock); | ||
147 | tmp = amd_sched_rq_select_entity(rq); | ||
148 | mutex_unlock(&rq->lock); | ||
149 | exit: | ||
150 | if (sched->current_entity && (sched->current_entity != tmp)) | ||
151 | wake_entity = sched->current_entity; | ||
152 | sched->current_entity = tmp; | ||
153 | if (wake_entity && wake_entity->need_wakeup) | ||
154 | wake_up(&wake_entity->wait_queue); | ||
155 | return tmp; | ||
156 | } | 96 | } |
157 | 97 | ||
158 | /** | 98 | /** |
@@ -171,31 +111,20 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, | |||
171 | struct amd_sched_rq *rq, | 111 | struct amd_sched_rq *rq, |
172 | uint32_t jobs) | 112 | uint32_t jobs) |
173 | { | 113 | { |
174 | uint64_t seq_ring = 0; | ||
175 | char name[20]; | ||
176 | |||
177 | if (!(sched && entity && rq)) | 114 | if (!(sched && entity && rq)) |
178 | return -EINVAL; | 115 | return -EINVAL; |
179 | 116 | ||
180 | memset(entity, 0, sizeof(struct amd_sched_entity)); | 117 | memset(entity, 0, sizeof(struct amd_sched_entity)); |
181 | seq_ring = ((uint64_t)sched->ring_id) << 60; | ||
182 | spin_lock_init(&entity->lock); | ||
183 | entity->belongto_rq = rq; | 118 | entity->belongto_rq = rq; |
184 | entity->scheduler = sched; | 119 | entity->scheduler = sched; |
185 | init_waitqueue_head(&entity->wait_queue); | ||
186 | init_waitqueue_head(&entity->wait_emit); | ||
187 | entity->fence_context = fence_context_alloc(1); | 120 | entity->fence_context = fence_context_alloc(1); |
188 | snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context); | ||
189 | memcpy(entity->name, name, 20); | ||
190 | entity->need_wakeup = false; | ||
191 | if(kfifo_alloc(&entity->job_queue, | 121 | if(kfifo_alloc(&entity->job_queue, |
192 | jobs * sizeof(void *), | 122 | jobs * sizeof(void *), |
193 | GFP_KERNEL)) | 123 | GFP_KERNEL)) |
194 | return -EINVAL; | 124 | return -EINVAL; |
195 | 125 | ||
196 | spin_lock_init(&entity->queue_lock); | 126 | spin_lock_init(&entity->queue_lock); |
197 | atomic64_set(&entity->last_queued_v_seq, seq_ring); | 127 | atomic_set(&entity->fence_seq, 0); |
198 | atomic64_set(&entity->last_signaled_v_seq, seq_ring); | ||
199 | 128 | ||
200 | /* Add the entity to the run queue */ | 129 | /* Add the entity to the run queue */ |
201 | amd_sched_rq_add_entity(rq, entity); | 130 | amd_sched_rq_add_entity(rq, entity); |
@@ -210,23 +139,24 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, | |||
210 | * | 139 | * |
211 | * return true if entity is initialized, false otherwise | 140 | * return true if entity is initialized, false otherwise |
212 | */ | 141 | */ |
213 | static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched, | 142 | static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched, |
214 | struct amd_sched_entity *entity) | 143 | struct amd_sched_entity *entity) |
215 | { | 144 | { |
216 | return entity->scheduler == sched && | 145 | return entity->scheduler == sched && |
217 | entity->belongto_rq != NULL; | 146 | entity->belongto_rq != NULL; |
218 | } | 147 | } |
219 | 148 | ||
220 | static bool is_context_entity_idle(struct amd_gpu_scheduler *sched, | 149 | /** |
221 | struct amd_sched_entity *entity) | 150 | * Check if entity is idle |
151 | * | ||
152 | * @entity The pointer to a valid scheduler entity | ||
153 | * | ||
154 | * Return true if entity don't has any unscheduled jobs. | ||
155 | */ | ||
156 | static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity) | ||
222 | { | 157 | { |
223 | /** | 158 | rmb(); |
224 | * Idle means no pending IBs, and the entity is not | 159 | if (kfifo_is_empty(&entity->job_queue)) |
225 | * currently being used. | ||
226 | */ | ||
227 | barrier(); | ||
228 | if ((sched->current_entity != entity) && | ||
229 | kfifo_is_empty(&entity->job_queue)) | ||
230 | return true; | 160 | return true; |
231 | 161 | ||
232 | return false; | 162 | return false; |
@@ -238,84 +168,114 @@ static bool is_context_entity_idle(struct amd_gpu_scheduler *sched, | |||
238 | * @sched Pointer to scheduler instance | 168 | * @sched Pointer to scheduler instance |
239 | * @entity The pointer to a valid scheduler entity | 169 | * @entity The pointer to a valid scheduler entity |
240 | * | 170 | * |
241 | * return 0 if succeed. negative error code on failure | 171 | * Cleanup and free the allocated resources. |
242 | */ | 172 | */ |
243 | int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, | 173 | void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, |
244 | struct amd_sched_entity *entity) | 174 | struct amd_sched_entity *entity) |
245 | { | 175 | { |
246 | int r = 0; | ||
247 | struct amd_sched_rq *rq = entity->belongto_rq; | 176 | struct amd_sched_rq *rq = entity->belongto_rq; |
248 | 177 | ||
249 | if (!is_context_entity_initialized(sched, entity)) | 178 | if (!amd_sched_entity_is_initialized(sched, entity)) |
250 | return 0; | 179 | return; |
251 | entity->need_wakeup = true; | 180 | |
252 | /** | 181 | /** |
253 | * The client will not queue more IBs during this fini, consume existing | 182 | * The client will not queue more IBs during this fini, consume existing |
254 | * queued IBs | 183 | * queued IBs |
255 | */ | 184 | */ |
256 | r = wait_event_timeout( | 185 | wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity)); |
257 | entity->wait_queue, | ||
258 | is_context_entity_idle(sched, entity), | ||
259 | msecs_to_jiffies(AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS) | ||
260 | ) ? 0 : -1; | ||
261 | |||
262 | if (r) { | ||
263 | if (entity->is_pending) | ||
264 | DRM_INFO("Entity %p is in waiting state during fini,\ | ||
265 | all pending ibs will be canceled.\n", | ||
266 | entity); | ||
267 | } | ||
268 | 186 | ||
269 | amd_sched_rq_remove_entity(rq, entity); | 187 | amd_sched_rq_remove_entity(rq, entity); |
270 | kfifo_free(&entity->job_queue); | 188 | kfifo_free(&entity->job_queue); |
271 | return r; | ||
272 | } | 189 | } |
273 | 190 | ||
274 | /** | 191 | /** |
275 | * Submit a normal job to the job queue | 192 | * Helper to submit a job to the job queue |
276 | * | 193 | * |
277 | * @sched The pointer to the scheduler | ||
278 | * @c_entity The pointer to amd_sched_entity | ||
279 | * @job The pointer to job required to submit | 194 | * @job The pointer to job required to submit |
280 | * return 0 if succeed. -1 if failed. | 195 | * |
281 | * -2 indicate queue is full for this client, client should wait untill | 196 | * Returns true if we could submit the job. |
282 | * scheduler consum some queued command. | 197 | */ |
283 | * -1 other fail. | 198 | static bool amd_sched_entity_in(struct amd_sched_job *job) |
284 | */ | 199 | { |
285 | int amd_sched_push_job(struct amd_gpu_scheduler *sched, | 200 | struct amd_sched_entity *entity = job->s_entity; |
286 | struct amd_sched_entity *c_entity, | 201 | bool added, first = false; |
287 | void *data, | 202 | |
288 | struct amd_sched_fence **fence) | 203 | spin_lock(&entity->queue_lock); |
204 | added = kfifo_in(&entity->job_queue, &job, sizeof(job)) == sizeof(job); | ||
205 | |||
206 | if (added && kfifo_len(&entity->job_queue) == sizeof(job)) | ||
207 | first = true; | ||
208 | |||
209 | spin_unlock(&entity->queue_lock); | ||
210 | |||
211 | /* first job wakes up scheduler */ | ||
212 | if (first) | ||
213 | amd_sched_wakeup(job->sched); | ||
214 | |||
215 | return added; | ||
216 | } | ||
217 | |||
218 | /** | ||
219 | * Submit a job to the job queue | ||
220 | * | ||
221 | * @job The pointer to job required to submit | ||
222 | * | ||
223 | * Returns 0 for success, negative error code otherwise. | ||
224 | */ | ||
225 | int amd_sched_entity_push_job(struct amd_sched_job *sched_job) | ||
289 | { | 226 | { |
290 | struct amd_sched_job *job; | 227 | struct amd_sched_entity *entity = sched_job->s_entity; |
228 | struct amd_sched_fence *fence = amd_sched_fence_create( | ||
229 | entity, sched_job->owner); | ||
230 | int r; | ||
291 | 231 | ||
292 | if (!fence) | 232 | if (!fence) |
293 | return -EINVAL; | ||
294 | job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL); | ||
295 | if (!job) | ||
296 | return -ENOMEM; | 233 | return -ENOMEM; |
297 | job->sched = sched; | 234 | |
298 | job->s_entity = c_entity; | 235 | fence_get(&fence->base); |
299 | job->data = data; | 236 | sched_job->s_fence = fence; |
300 | *fence = amd_sched_fence_create(c_entity); | 237 | |
301 | if ((*fence) == NULL) { | 238 | r = wait_event_interruptible(entity->scheduler->job_scheduled, |
302 | kfree(job); | 239 | amd_sched_entity_in(sched_job)); |
303 | return -EINVAL; | 240 | |
304 | } | 241 | return r; |
305 | fence_get(&(*fence)->base); | 242 | } |
306 | job->s_fence = *fence; | 243 | |
307 | while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), | 244 | /** |
308 | &c_entity->queue_lock) != sizeof(void *)) { | 245 | * Return ture if we can push more jobs to the hw. |
309 | /** | 246 | */ |
310 | * Current context used up all its IB slots | 247 | static bool amd_sched_ready(struct amd_gpu_scheduler *sched) |
311 | * wait here, or need to check whether GPU is hung | 248 | { |
312 | */ | 249 | return atomic_read(&sched->hw_rq_count) < |
313 | schedule(); | 250 | sched->hw_submission_limit; |
314 | } | 251 | } |
315 | /* first job wake up scheduler */ | 252 | |
316 | if ((kfifo_len(&c_entity->job_queue) / sizeof(void *)) == 1) | 253 | /** |
317 | wake_up_interruptible(&sched->wait_queue); | 254 | * Wake up the scheduler when it is ready |
318 | return 0; | 255 | */ |
256 | static void amd_sched_wakeup(struct amd_gpu_scheduler *sched) | ||
257 | { | ||
258 | if (amd_sched_ready(sched)) | ||
259 | wake_up_interruptible(&sched->wake_up_worker); | ||
260 | } | ||
261 | |||
262 | /** | ||
263 | * Select next entity containing real IB submissions | ||
264 | */ | ||
265 | static struct amd_sched_entity * | ||
266 | amd_sched_select_context(struct amd_gpu_scheduler *sched) | ||
267 | { | ||
268 | struct amd_sched_entity *tmp; | ||
269 | |||
270 | if (!amd_sched_ready(sched)) | ||
271 | return NULL; | ||
272 | |||
273 | /* Kernel run queue has higher priority than normal run queue*/ | ||
274 | tmp = amd_sched_rq_select_entity(&sched->kernel_rq); | ||
275 | if (tmp == NULL) | ||
276 | tmp = amd_sched_rq_select_entity(&sched->sched_rq); | ||
277 | |||
278 | return tmp; | ||
319 | } | 279 | } |
320 | 280 | ||
321 | static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) | 281 | static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) |
@@ -323,52 +283,41 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) | |||
323 | struct amd_sched_job *sched_job = | 283 | struct amd_sched_job *sched_job = |
324 | container_of(cb, struct amd_sched_job, cb); | 284 | container_of(cb, struct amd_sched_job, cb); |
325 | struct amd_gpu_scheduler *sched; | 285 | struct amd_gpu_scheduler *sched; |
326 | unsigned long flags; | ||
327 | 286 | ||
328 | sched = sched_job->sched; | 287 | sched = sched_job->sched; |
329 | atomic64_set(&sched_job->s_entity->last_signaled_v_seq, | ||
330 | sched_job->s_fence->v_seq); | ||
331 | amd_sched_fence_signal(sched_job->s_fence); | 288 | amd_sched_fence_signal(sched_job->s_fence); |
332 | spin_lock_irqsave(&sched->queue_lock, flags); | 289 | atomic_dec(&sched->hw_rq_count); |
333 | list_del(&sched_job->list); | ||
334 | atomic64_dec(&sched->hw_rq_count); | ||
335 | spin_unlock_irqrestore(&sched->queue_lock, flags); | ||
336 | |||
337 | sched->ops->process_job(sched, sched_job); | ||
338 | fence_put(&sched_job->s_fence->base); | 290 | fence_put(&sched_job->s_fence->base); |
339 | kfree(sched_job); | 291 | sched->ops->process_job(sched_job); |
340 | wake_up_interruptible(&sched->wait_queue); | 292 | wake_up_interruptible(&sched->wake_up_worker); |
341 | } | 293 | } |
342 | 294 | ||
343 | static int amd_sched_main(void *param) | 295 | static int amd_sched_main(void *param) |
344 | { | 296 | { |
345 | int r; | ||
346 | struct amd_sched_job *job; | ||
347 | struct sched_param sparam = {.sched_priority = 1}; | 297 | struct sched_param sparam = {.sched_priority = 1}; |
348 | struct amd_sched_entity *c_entity = NULL; | ||
349 | struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param; | 298 | struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param; |
299 | int r; | ||
350 | 300 | ||
351 | sched_setscheduler(current, SCHED_FIFO, &sparam); | 301 | sched_setscheduler(current, SCHED_FIFO, &sparam); |
352 | 302 | ||
353 | while (!kthread_should_stop()) { | 303 | while (!kthread_should_stop()) { |
304 | struct amd_sched_entity *c_entity = NULL; | ||
305 | struct amd_sched_job *job; | ||
354 | struct fence *fence; | 306 | struct fence *fence; |
355 | 307 | ||
356 | wait_event_interruptible(sched->wait_queue, | 308 | wait_event_interruptible(sched->wake_up_worker, |
357 | is_scheduler_ready(sched) && | 309 | kthread_should_stop() || |
358 | (c_entity = select_context(sched))); | 310 | (c_entity = amd_sched_select_context(sched))); |
311 | |||
312 | if (!c_entity) | ||
313 | continue; | ||
314 | |||
359 | r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *)); | 315 | r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *)); |
360 | if (r != sizeof(void *)) | 316 | if (r != sizeof(void *)) |
361 | continue; | 317 | continue; |
362 | r = sched->ops->prepare_job(sched, c_entity, job); | 318 | atomic_inc(&sched->hw_rq_count); |
363 | if (!r) { | 319 | |
364 | unsigned long flags; | 320 | fence = sched->ops->run_job(job); |
365 | spin_lock_irqsave(&sched->queue_lock, flags); | ||
366 | list_add_tail(&job->list, &sched->active_hw_rq); | ||
367 | atomic64_inc(&sched->hw_rq_count); | ||
368 | spin_unlock_irqrestore(&sched->queue_lock, flags); | ||
369 | } | ||
370 | mutex_lock(&sched->sched_lock); | ||
371 | fence = sched->ops->run_job(sched, c_entity, job); | ||
372 | if (fence) { | 321 | if (fence) { |
373 | r = fence_add_callback(fence, &job->cb, | 322 | r = fence_add_callback(fence, &job->cb, |
374 | amd_sched_process_job); | 323 | amd_sched_process_job); |
@@ -378,7 +327,8 @@ static int amd_sched_main(void *param) | |||
378 | DRM_ERROR("fence add callback failed (%d)\n", r); | 327 | DRM_ERROR("fence add callback failed (%d)\n", r); |
379 | fence_put(fence); | 328 | fence_put(fence); |
380 | } | 329 | } |
381 | mutex_unlock(&sched->sched_lock); | 330 | |
331 | wake_up(&sched->job_scheduled); | ||
382 | } | 332 | } |
383 | return 0; | 333 | return 0; |
384 | } | 334 | } |
@@ -386,53 +336,42 @@ static int amd_sched_main(void *param) | |||
386 | /** | 336 | /** |
387 | * Create a gpu scheduler | 337 | * Create a gpu scheduler |
388 | * | 338 | * |
389 | * @device The device context for this scheduler | 339 | * @ops The backend operations for this scheduler. |
390 | * @ops The backend operations for this scheduler. | 340 | * @ring The the ring id for the scheduler. |
391 | * @id The scheduler is per ring, here is ring id. | 341 | * @hw_submissions Number of hw submissions to do. |
392 | * @granularity The minumum ms unit the scheduler will scheduled. | ||
393 | * @preemption Indicate whether this ring support preemption, 0 is no. | ||
394 | * | 342 | * |
395 | * return the pointer to scheduler for success, otherwise return NULL | 343 | * Return the pointer to scheduler for success, otherwise return NULL |
396 | */ | 344 | */ |
397 | struct amd_gpu_scheduler *amd_sched_create(void *device, | 345 | struct amd_gpu_scheduler *amd_sched_create(struct amd_sched_backend_ops *ops, |
398 | struct amd_sched_backend_ops *ops, | 346 | unsigned ring, unsigned hw_submission, |
399 | unsigned ring, | 347 | void *priv) |
400 | unsigned granularity, | ||
401 | unsigned preemption, | ||
402 | unsigned hw_submission) | ||
403 | { | 348 | { |
404 | struct amd_gpu_scheduler *sched; | 349 | struct amd_gpu_scheduler *sched; |
405 | char name[20]; | ||
406 | 350 | ||
407 | sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL); | 351 | sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL); |
408 | if (!sched) | 352 | if (!sched) |
409 | return NULL; | 353 | return NULL; |
410 | 354 | ||
411 | sched->device = device; | ||
412 | sched->ops = ops; | 355 | sched->ops = ops; |
413 | sched->granularity = granularity; | ||
414 | sched->ring_id = ring; | 356 | sched->ring_id = ring; |
415 | sched->preemption = preemption; | ||
416 | sched->hw_submission_limit = hw_submission; | 357 | sched->hw_submission_limit = hw_submission; |
417 | snprintf(name, sizeof(name), "gpu_sched[%d]", ring); | 358 | sched->priv = priv; |
418 | mutex_init(&sched->sched_lock); | 359 | snprintf(sched->name, sizeof(sched->name), "amdgpu[%d]", ring); |
419 | spin_lock_init(&sched->queue_lock); | ||
420 | amd_sched_rq_init(&sched->sched_rq); | 360 | amd_sched_rq_init(&sched->sched_rq); |
421 | amd_sched_rq_init(&sched->kernel_rq); | 361 | amd_sched_rq_init(&sched->kernel_rq); |
422 | 362 | ||
423 | init_waitqueue_head(&sched->wait_queue); | 363 | init_waitqueue_head(&sched->wake_up_worker); |
424 | INIT_LIST_HEAD(&sched->active_hw_rq); | 364 | init_waitqueue_head(&sched->job_scheduled); |
425 | atomic64_set(&sched->hw_rq_count, 0); | 365 | atomic_set(&sched->hw_rq_count, 0); |
426 | /* Each scheduler will run on a seperate kernel thread */ | 366 | /* Each scheduler will run on a seperate kernel thread */ |
427 | sched->thread = kthread_create(amd_sched_main, sched, name); | 367 | sched->thread = kthread_run(amd_sched_main, sched, sched->name); |
428 | if (sched->thread) { | 368 | if (IS_ERR(sched->thread)) { |
429 | wake_up_process(sched->thread); | 369 | DRM_ERROR("Failed to create scheduler for id %d.\n", ring); |
430 | return sched; | 370 | kfree(sched); |
371 | return NULL; | ||
431 | } | 372 | } |
432 | 373 | ||
433 | DRM_ERROR("Failed to create scheduler for id %d.\n", ring); | 374 | return sched; |
434 | kfree(sched); | ||
435 | return NULL; | ||
436 | } | 375 | } |
437 | 376 | ||
438 | /** | 377 | /** |
@@ -448,15 +387,3 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched) | |||
448 | kfree(sched); | 387 | kfree(sched); |
449 | return 0; | 388 | return 0; |
450 | } | 389 | } |
451 | |||
452 | /** | ||
453 | * Get next queued sequence number | ||
454 | * | ||
455 | * @entity The context entity | ||
456 | * | ||
457 | * return the next queued sequence number | ||
458 | */ | ||
459 | uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity) | ||
460 | { | ||
461 | return atomic64_read(&c_entity->last_queued_v_seq) + 1; | ||
462 | } | ||
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index ceb5918bfbeb..e797796dcad7 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | |||
@@ -27,8 +27,6 @@ | |||
27 | #include <linux/kfifo.h> | 27 | #include <linux/kfifo.h> |
28 | #include <linux/fence.h> | 28 | #include <linux/fence.h> |
29 | 29 | ||
30 | #define AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 | ||
31 | |||
32 | struct amd_gpu_scheduler; | 30 | struct amd_gpu_scheduler; |
33 | struct amd_sched_rq; | 31 | struct amd_sched_rq; |
34 | 32 | ||
@@ -41,20 +39,12 @@ struct amd_sched_rq; | |||
41 | struct amd_sched_entity { | 39 | struct amd_sched_entity { |
42 | struct list_head list; | 40 | struct list_head list; |
43 | struct amd_sched_rq *belongto_rq; | 41 | struct amd_sched_rq *belongto_rq; |
44 | spinlock_t lock; | 42 | atomic_t fence_seq; |
45 | /* the virtual_seq is unique per context per ring */ | ||
46 | atomic64_t last_queued_v_seq; | ||
47 | atomic64_t last_signaled_v_seq; | ||
48 | /* the job_queue maintains the jobs submitted by clients */ | 43 | /* the job_queue maintains the jobs submitted by clients */ |
49 | struct kfifo job_queue; | 44 | struct kfifo job_queue; |
50 | spinlock_t queue_lock; | 45 | spinlock_t queue_lock; |
51 | struct amd_gpu_scheduler *scheduler; | 46 | struct amd_gpu_scheduler *scheduler; |
52 | wait_queue_head_t wait_queue; | ||
53 | wait_queue_head_t wait_emit; | ||
54 | bool is_pending; | ||
55 | uint64_t fence_context; | 47 | uint64_t fence_context; |
56 | char name[20]; | ||
57 | bool need_wakeup; | ||
58 | }; | 48 | }; |
59 | 49 | ||
60 | /** | 50 | /** |
@@ -63,26 +53,24 @@ struct amd_sched_entity { | |||
63 | * the next entity to emit commands from. | 53 | * the next entity to emit commands from. |
64 | */ | 54 | */ |
65 | struct amd_sched_rq { | 55 | struct amd_sched_rq { |
66 | struct mutex lock; | 56 | spinlock_t lock; |
67 | struct list_head entities; | 57 | struct list_head entities; |
68 | struct amd_sched_entity *current_entity; | 58 | struct amd_sched_entity *current_entity; |
69 | }; | 59 | }; |
70 | 60 | ||
71 | struct amd_sched_fence { | 61 | struct amd_sched_fence { |
72 | struct fence base; | 62 | struct fence base; |
73 | struct fence_cb cb; | 63 | struct amd_gpu_scheduler *scheduler; |
74 | struct amd_sched_entity *entity; | ||
75 | uint64_t v_seq; | ||
76 | spinlock_t lock; | 64 | spinlock_t lock; |
65 | void *owner; | ||
77 | }; | 66 | }; |
78 | 67 | ||
79 | struct amd_sched_job { | 68 | struct amd_sched_job { |
80 | struct list_head list; | ||
81 | struct fence_cb cb; | 69 | struct fence_cb cb; |
82 | struct amd_gpu_scheduler *sched; | 70 | struct amd_gpu_scheduler *sched; |
83 | struct amd_sched_entity *s_entity; | 71 | struct amd_sched_entity *s_entity; |
84 | void *data; | ||
85 | struct amd_sched_fence *s_fence; | 72 | struct amd_sched_fence *s_fence; |
73 | void *owner; | ||
86 | }; | 74 | }; |
87 | 75 | ||
88 | extern const struct fence_ops amd_sched_fence_ops; | 76 | extern const struct fence_ops amd_sched_fence_ops; |
@@ -101,61 +89,42 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f) | |||
101 | * these functions should be implemented in driver side | 89 | * these functions should be implemented in driver side |
102 | */ | 90 | */ |
103 | struct amd_sched_backend_ops { | 91 | struct amd_sched_backend_ops { |
104 | int (*prepare_job)(struct amd_gpu_scheduler *sched, | 92 | struct fence *(*run_job)(struct amd_sched_job *job); |
105 | struct amd_sched_entity *c_entity, | 93 | void (*process_job)(struct amd_sched_job *job); |
106 | struct amd_sched_job *job); | ||
107 | struct fence *(*run_job)(struct amd_gpu_scheduler *sched, | ||
108 | struct amd_sched_entity *c_entity, | ||
109 | struct amd_sched_job *job); | ||
110 | void (*process_job)(struct amd_gpu_scheduler *sched, | ||
111 | struct amd_sched_job *job); | ||
112 | }; | 94 | }; |
113 | 95 | ||
114 | /** | 96 | /** |
115 | * One scheduler is implemented for each hardware ring | 97 | * One scheduler is implemented for each hardware ring |
116 | */ | 98 | */ |
117 | struct amd_gpu_scheduler { | 99 | struct amd_gpu_scheduler { |
118 | void *device; | ||
119 | struct task_struct *thread; | 100 | struct task_struct *thread; |
120 | struct amd_sched_rq sched_rq; | 101 | struct amd_sched_rq sched_rq; |
121 | struct amd_sched_rq kernel_rq; | 102 | struct amd_sched_rq kernel_rq; |
122 | struct list_head active_hw_rq; | 103 | atomic_t hw_rq_count; |
123 | atomic64_t hw_rq_count; | ||
124 | struct amd_sched_backend_ops *ops; | 104 | struct amd_sched_backend_ops *ops; |
125 | uint32_t ring_id; | 105 | uint32_t ring_id; |
126 | uint32_t granularity; /* in ms unit */ | 106 | wait_queue_head_t wake_up_worker; |
127 | uint32_t preemption; | 107 | wait_queue_head_t job_scheduled; |
128 | wait_queue_head_t wait_queue; | ||
129 | struct amd_sched_entity *current_entity; | ||
130 | struct mutex sched_lock; | ||
131 | spinlock_t queue_lock; | ||
132 | uint32_t hw_submission_limit; | 108 | uint32_t hw_submission_limit; |
109 | char name[20]; | ||
110 | void *priv; | ||
133 | }; | 111 | }; |
134 | 112 | ||
135 | struct amd_gpu_scheduler *amd_sched_create(void *device, | 113 | struct amd_gpu_scheduler * |
136 | struct amd_sched_backend_ops *ops, | 114 | amd_sched_create(struct amd_sched_backend_ops *ops, |
137 | uint32_t ring, | 115 | uint32_t ring, uint32_t hw_submission, void *priv); |
138 | uint32_t granularity, | ||
139 | uint32_t preemption, | ||
140 | uint32_t hw_submission); | ||
141 | int amd_sched_destroy(struct amd_gpu_scheduler *sched); | 116 | int amd_sched_destroy(struct amd_gpu_scheduler *sched); |
142 | 117 | ||
143 | int amd_sched_push_job(struct amd_gpu_scheduler *sched, | ||
144 | struct amd_sched_entity *c_entity, | ||
145 | void *data, | ||
146 | struct amd_sched_fence **fence); | ||
147 | |||
148 | int amd_sched_entity_init(struct amd_gpu_scheduler *sched, | 118 | int amd_sched_entity_init(struct amd_gpu_scheduler *sched, |
149 | struct amd_sched_entity *entity, | 119 | struct amd_sched_entity *entity, |
150 | struct amd_sched_rq *rq, | 120 | struct amd_sched_rq *rq, |
151 | uint32_t jobs); | 121 | uint32_t jobs); |
152 | int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, | 122 | void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, |
153 | struct amd_sched_entity *entity); | 123 | struct amd_sched_entity *entity); |
154 | 124 | int amd_sched_entity_push_job(struct amd_sched_job *sched_job); | |
155 | uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity); | ||
156 | 125 | ||
157 | struct amd_sched_fence *amd_sched_fence_create( | 126 | struct amd_sched_fence *amd_sched_fence_create( |
158 | struct amd_sched_entity *s_entity); | 127 | struct amd_sched_entity *s_entity, void *owner); |
159 | void amd_sched_fence_signal(struct amd_sched_fence *fence); | 128 | void amd_sched_fence_signal(struct amd_sched_fence *fence); |
160 | 129 | ||
161 | 130 | ||
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c index a4751598c0b4..e62c37920e11 100644 --- a/drivers/gpu/drm/amd/scheduler/sched_fence.c +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c | |||
@@ -27,19 +27,22 @@ | |||
27 | #include <drm/drmP.h> | 27 | #include <drm/drmP.h> |
28 | #include "gpu_scheduler.h" | 28 | #include "gpu_scheduler.h" |
29 | 29 | ||
30 | struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity) | 30 | struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity, void *owner) |
31 | { | 31 | { |
32 | struct amd_sched_fence *fence = NULL; | 32 | struct amd_sched_fence *fence = NULL; |
33 | unsigned seq; | ||
34 | |||
33 | fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); | 35 | fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); |
34 | if (fence == NULL) | 36 | if (fence == NULL) |
35 | return NULL; | 37 | return NULL; |
36 | fence->v_seq = atomic64_inc_return(&s_entity->last_queued_v_seq); | 38 | fence->owner = owner; |
37 | fence->entity = s_entity; | 39 | fence->scheduler = s_entity->scheduler; |
38 | spin_lock_init(&fence->lock); | 40 | spin_lock_init(&fence->lock); |
39 | fence_init(&fence->base, &amd_sched_fence_ops, | 41 | |
40 | &fence->lock, | 42 | seq = atomic_inc_return(&s_entity->fence_seq); |
41 | s_entity->fence_context, | 43 | fence_init(&fence->base, &amd_sched_fence_ops, &fence->lock, |
42 | fence->v_seq); | 44 | s_entity->fence_context, seq); |
45 | |||
43 | return fence; | 46 | return fence; |
44 | } | 47 | } |
45 | 48 | ||
@@ -60,7 +63,7 @@ static const char *amd_sched_fence_get_driver_name(struct fence *fence) | |||
60 | static const char *amd_sched_fence_get_timeline_name(struct fence *f) | 63 | static const char *amd_sched_fence_get_timeline_name(struct fence *f) |
61 | { | 64 | { |
62 | struct amd_sched_fence *fence = to_amd_sched_fence(f); | 65 | struct amd_sched_fence *fence = to_amd_sched_fence(f); |
63 | return (const char *)fence->entity->name; | 66 | return (const char *)fence->scheduler->name; |
64 | } | 67 | } |
65 | 68 | ||
66 | static bool amd_sched_fence_enable_signaling(struct fence *f) | 69 | static bool amd_sched_fence_enable_signaling(struct fence *f) |
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 94b21ae70ef7..5a2cafb4f1bc 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c | |||
@@ -95,6 +95,11 @@ void radeon_connector_hotplug(struct drm_connector *connector) | |||
95 | if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) { | 95 | if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) { |
96 | drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); | 96 | drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); |
97 | } else if (radeon_dp_needs_link_train(radeon_connector)) { | 97 | } else if (radeon_dp_needs_link_train(radeon_connector)) { |
98 | /* Don't try to start link training before we | ||
99 | * have the dpcd */ | ||
100 | if (!radeon_dp_getdpcd(radeon_connector)) | ||
101 | return; | ||
102 | |||
98 | /* set it to OFF so that drm_helper_connector_dpms() | 103 | /* set it to OFF so that drm_helper_connector_dpms() |
99 | * won't return immediately since the current state | 104 | * won't return immediately since the current state |
100 | * is ON at this point. | 105 | * is ON at this point. |