aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2015-08-26 23:00:28 -0400
committerDave Airlie <airlied@redhat.com>2015-08-26 23:00:28 -0400
commit40b2dffbcc67e92d5df97785dffc68fe88605bfa (patch)
tree91276b6ae4210791ad4494adaf69a56b16c7b0ac
parentdb56176025cee5e242dfeed5f4e304d095d29fa3 (diff)
parentc2b6bd7e91aad8440a2f55bdbde6f5a8ae19fac5 (diff)
Merge branch 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
- DP fixes for radeon and amdgpu - IH ring fix for tonga and fiji - Lots of GPU scheduler fixes - Misc additional fixes * 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux: (42 commits) drm/amdgpu: fix wait queue handling in the scheduler drm/amdgpu: remove extra parameters from scheduler callbacks drm/amdgpu: wake up scheduler only when neccessary drm/amdgpu: remove entity idle timeout v2 drm/amdgpu: fix postclose order drm/amdgpu: use IB for copy buffer of eviction drm/amdgpu: adjust the judgement of removing fence callback drm/amdgpu: fix no sync_wait in copy_buffer drm/amdgpu: fix last_vm_update fence is not effetive for sched fence drm/amdgpu: add priv data to sched drm/amdgpu: add owner for sched fence drm/amdgpu: remove entity reference from sched fence drm/amdgpu: fix and cleanup amd_sched_entity_push_job drm/amdgpu: remove amdgpu_bo_list_clone drm/amdgpu: remove the context from amdgpu_job drm/amdgpu: remove unused parameters to amd_sched_create drm/amdgpu: remove sched_lock drm/amdgpu: remove prepare_job callback drm/amdgpu: cleanup a scheduler function name drm/amdgpu: reorder scheduler functions ...
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c222
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c94
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c19
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.c377
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.h69
-rw-r--r--drivers/gpu/drm/amd/scheduler/sched_fence.c19
-rw-r--r--drivers/gpu/drm/radeon/radeon_connectors.c5
30 files changed, 671 insertions, 727 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2fc58e658986..aa2dcf578dd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -183,6 +183,7 @@ struct amdgpu_vm;
183struct amdgpu_ring; 183struct amdgpu_ring;
184struct amdgpu_semaphore; 184struct amdgpu_semaphore;
185struct amdgpu_cs_parser; 185struct amdgpu_cs_parser;
186struct amdgpu_job;
186struct amdgpu_irq_src; 187struct amdgpu_irq_src;
187struct amdgpu_fpriv; 188struct amdgpu_fpriv;
188 189
@@ -246,7 +247,7 @@ struct amdgpu_buffer_funcs {
246 unsigned copy_num_dw; 247 unsigned copy_num_dw;
247 248
248 /* used for buffer migration */ 249 /* used for buffer migration */
249 void (*emit_copy_buffer)(struct amdgpu_ring *ring, 250 void (*emit_copy_buffer)(struct amdgpu_ib *ib,
250 /* src addr in bytes */ 251 /* src addr in bytes */
251 uint64_t src_offset, 252 uint64_t src_offset,
252 /* dst addr in bytes */ 253 /* dst addr in bytes */
@@ -439,9 +440,12 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
439int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); 440int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
440unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); 441unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
441 442
442signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, 443signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev,
443 struct amdgpu_fence **fences, 444 struct fence **array,
444 bool intr, long t); 445 uint32_t count,
446 bool wait_all,
447 bool intr,
448 signed long t);
445struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence); 449struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
446void amdgpu_fence_unref(struct amdgpu_fence **fence); 450void amdgpu_fence_unref(struct amdgpu_fence **fence);
447 451
@@ -514,7 +518,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
514 uint64_t dst_offset, 518 uint64_t dst_offset,
515 uint32_t byte_count, 519 uint32_t byte_count,
516 struct reservation_object *resv, 520 struct reservation_object *resv,
517 struct amdgpu_fence **fence); 521 struct fence **fence);
518int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); 522int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
519 523
520struct amdgpu_bo_list_entry { 524struct amdgpu_bo_list_entry {
@@ -650,7 +654,7 @@ struct amdgpu_sa_bo {
650 struct amdgpu_sa_manager *manager; 654 struct amdgpu_sa_manager *manager;
651 unsigned soffset; 655 unsigned soffset;
652 unsigned eoffset; 656 unsigned eoffset;
653 struct amdgpu_fence *fence; 657 struct fence *fence;
654}; 658};
655 659
656/* 660/*
@@ -692,7 +696,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
692 struct amdgpu_semaphore *semaphore); 696 struct amdgpu_semaphore *semaphore);
693void amdgpu_semaphore_free(struct amdgpu_device *adev, 697void amdgpu_semaphore_free(struct amdgpu_device *adev,
694 struct amdgpu_semaphore **semaphore, 698 struct amdgpu_semaphore **semaphore,
695 struct amdgpu_fence *fence); 699 struct fence *fence);
696 700
697/* 701/*
698 * Synchronization 702 * Synchronization
@@ -700,7 +704,8 @@ void amdgpu_semaphore_free(struct amdgpu_device *adev,
700struct amdgpu_sync { 704struct amdgpu_sync {
701 struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS]; 705 struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS];
702 struct amdgpu_fence *sync_to[AMDGPU_MAX_RINGS]; 706 struct amdgpu_fence *sync_to[AMDGPU_MAX_RINGS];
703 struct amdgpu_fence *last_vm_update; 707 DECLARE_HASHTABLE(fences, 4);
708 struct fence *last_vm_update;
704}; 709};
705 710
706void amdgpu_sync_create(struct amdgpu_sync *sync); 711void amdgpu_sync_create(struct amdgpu_sync *sync);
@@ -712,8 +717,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
712 void *owner); 717 void *owner);
713int amdgpu_sync_rings(struct amdgpu_sync *sync, 718int amdgpu_sync_rings(struct amdgpu_sync *sync,
714 struct amdgpu_ring *ring); 719 struct amdgpu_ring *ring);
720int amdgpu_sync_wait(struct amdgpu_sync *sync);
715void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, 721void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync,
716 struct amdgpu_fence *fence); 722 struct fence *fence);
717 723
718/* 724/*
719 * GART structures, functions & helpers 725 * GART structures, functions & helpers
@@ -871,7 +877,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
871 struct amdgpu_ring *ring, 877 struct amdgpu_ring *ring,
872 struct amdgpu_ib *ibs, 878 struct amdgpu_ib *ibs,
873 unsigned num_ibs, 879 unsigned num_ibs,
874 int (*free_job)(struct amdgpu_cs_parser *), 880 int (*free_job)(struct amdgpu_job *),
875 void *owner, 881 void *owner,
876 struct fence **fence); 882 struct fence **fence);
877 883
@@ -957,7 +963,7 @@ struct amdgpu_vm_id {
957 unsigned id; 963 unsigned id;
958 uint64_t pd_gpu_addr; 964 uint64_t pd_gpu_addr;
959 /* last flushed PD/PT update */ 965 /* last flushed PD/PT update */
960 struct amdgpu_fence *flushed_updates; 966 struct fence *flushed_updates;
961 /* last use of vmid */ 967 /* last use of vmid */
962 struct amdgpu_fence *last_id_use; 968 struct amdgpu_fence *last_id_use;
963}; 969};
@@ -1042,7 +1048,7 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
1042int amdgpu_ctx_put(struct amdgpu_ctx *ctx); 1048int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
1043 1049
1044uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, 1050uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
1045 struct fence *fence, uint64_t queued_seq); 1051 struct fence *fence);
1046struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, 1052struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
1047 struct amdgpu_ring *ring, uint64_t seq); 1053 struct amdgpu_ring *ring, uint64_t seq);
1048 1054
@@ -1078,8 +1084,6 @@ struct amdgpu_bo_list {
1078}; 1084};
1079 1085
1080struct amdgpu_bo_list * 1086struct amdgpu_bo_list *
1081amdgpu_bo_list_clone(struct amdgpu_bo_list *list);
1082struct amdgpu_bo_list *
1083amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id); 1087amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id);
1084void amdgpu_bo_list_put(struct amdgpu_bo_list *list); 1088void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
1085void amdgpu_bo_list_free(struct amdgpu_bo_list *list); 1089void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
@@ -1255,14 +1259,16 @@ struct amdgpu_cs_parser {
1255 1259
1256 /* user fence */ 1260 /* user fence */
1257 struct amdgpu_user_fence uf; 1261 struct amdgpu_user_fence uf;
1262};
1258 1263
1259 struct amdgpu_ring *ring; 1264struct amdgpu_job {
1260 struct mutex job_lock; 1265 struct amd_sched_job base;
1261 struct work_struct job_work; 1266 struct amdgpu_device *adev;
1262 int (*prepare_job)(struct amdgpu_cs_parser *sched_job); 1267 struct amdgpu_ib *ibs;
1263 int (*run_job)(struct amdgpu_cs_parser *sched_job); 1268 uint32_t num_ibs;
1264 int (*free_job)(struct amdgpu_cs_parser *sched_job); 1269 struct mutex job_lock;
1265 struct amd_sched_fence *s_fence; 1270 struct amdgpu_user_fence uf;
1271 int (*free_job)(struct amdgpu_job *sched_job);
1266}; 1272};
1267 1273
1268static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) 1274static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx)
@@ -2241,7 +2247,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
2241#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) 2247#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
2242#define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s)) 2248#define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s))
2243#define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s)) 2249#define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s))
2244#define amdgpu_emit_copy_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((r), (s), (d), (b)) 2250#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b))
2245#define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b)) 2251#define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b))
2246#define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev)) 2252#define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev))
2247#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) 2253#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev))
@@ -2343,7 +2349,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
2343 struct amdgpu_sync *sync); 2349 struct amdgpu_sync *sync);
2344void amdgpu_vm_flush(struct amdgpu_ring *ring, 2350void amdgpu_vm_flush(struct amdgpu_ring *ring,
2345 struct amdgpu_vm *vm, 2351 struct amdgpu_vm *vm,
2346 struct amdgpu_fence *updates); 2352 struct fence *updates);
2347void amdgpu_vm_fence(struct amdgpu_device *adev, 2353void amdgpu_vm_fence(struct amdgpu_device *adev,
2348 struct amdgpu_vm *vm, 2354 struct amdgpu_vm *vm,
2349 struct amdgpu_fence *fence); 2355 struct amdgpu_fence *fence);
@@ -2373,7 +2379,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
2373 uint64_t addr); 2379 uint64_t addr);
2374void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, 2380void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
2375 struct amdgpu_bo_va *bo_va); 2381 struct amdgpu_bo_va *bo_va);
2376 2382int amdgpu_vm_free_job(struct amdgpu_job *job);
2377/* 2383/*
2378 * functions used by amdgpu_encoder.c 2384 * functions used by amdgpu_encoder.c
2379 */ 2385 */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 759482e4300d..98d59ee640ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
33{ 33{
34 unsigned long start_jiffies; 34 unsigned long start_jiffies;
35 unsigned long end_jiffies; 35 unsigned long end_jiffies;
36 struct amdgpu_fence *fence = NULL; 36 struct fence *fence = NULL;
37 int i, r; 37 int i, r;
38 38
39 start_jiffies = jiffies; 39 start_jiffies = jiffies;
@@ -42,17 +42,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
42 r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence); 42 r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence);
43 if (r) 43 if (r)
44 goto exit_do_move; 44 goto exit_do_move;
45 r = fence_wait(&fence->base, false); 45 r = fence_wait(fence, false);
46 if (r) 46 if (r)
47 goto exit_do_move; 47 goto exit_do_move;
48 amdgpu_fence_unref(&fence); 48 fence_put(fence);
49 } 49 }
50 end_jiffies = jiffies; 50 end_jiffies = jiffies;
51 r = jiffies_to_msecs(end_jiffies - start_jiffies); 51 r = jiffies_to_msecs(end_jiffies - start_jiffies);
52 52
53exit_do_move: 53exit_do_move:
54 if (fence) 54 if (fence)
55 amdgpu_fence_unref(&fence); 55 fence_put(fence);
56 return r; 56 return r;
57} 57}
58 58
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 7eed523bf28f..f82a2dd83874 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -62,39 +62,6 @@ static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
62 return 0; 62 return 0;
63} 63}
64 64
65struct amdgpu_bo_list *
66amdgpu_bo_list_clone(struct amdgpu_bo_list *list)
67{
68 struct amdgpu_bo_list *result;
69 unsigned i;
70
71 result = kmalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
72 if (!result)
73 return NULL;
74
75 result->array = drm_calloc_large(list->num_entries,
76 sizeof(struct amdgpu_bo_list_entry));
77 if (!result->array) {
78 kfree(result);
79 return NULL;
80 }
81
82 mutex_init(&result->lock);
83 result->gds_obj = list->gds_obj;
84 result->gws_obj = list->gws_obj;
85 result->oa_obj = list->oa_obj;
86 result->has_userptr = list->has_userptr;
87 result->num_entries = list->num_entries;
88
89 memcpy(result->array, list->array, list->num_entries *
90 sizeof(struct amdgpu_bo_list_entry));
91
92 for (i = 0; i < result->num_entries; ++i)
93 amdgpu_bo_ref(result->array[i].robj);
94
95 return result;
96}
97
98static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) 65static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
99{ 66{
100 struct amdgpu_bo_list *list; 67 struct amdgpu_bo_list *list;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 27df17a0e620..89c3dd62ba21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -75,6 +75,11 @@ void amdgpu_connector_hotplug(struct drm_connector *connector)
75 if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { 75 if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
76 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); 76 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
77 } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { 77 } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
78 /* Don't try to start link training before we
79 * have the dpcd */
80 if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
81 return;
82
78 /* set it to OFF so that drm_helper_connector_dpms() 83 /* set it to OFF so that drm_helper_connector_dpms()
79 * won't return immediately since the current state 84 * won't return immediately since the current state
80 * is ON at this point. 85 * is ON at this point.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e4424b4db5d3..6a206f15635f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -126,19 +126,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
126 return 0; 126 return 0;
127} 127}
128 128
129static void amdgpu_job_work_func(struct work_struct *work)
130{
131 struct amdgpu_cs_parser *sched_job =
132 container_of(work, struct amdgpu_cs_parser,
133 job_work);
134 mutex_lock(&sched_job->job_lock);
135 if (sched_job->free_job)
136 sched_job->free_job(sched_job);
137 mutex_unlock(&sched_job->job_lock);
138 /* after processing job, free memory */
139 fence_put(&sched_job->s_fence->base);
140 kfree(sched_job);
141}
142struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, 129struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
143 struct drm_file *filp, 130 struct drm_file *filp,
144 struct amdgpu_ctx *ctx, 131 struct amdgpu_ctx *ctx,
@@ -157,10 +144,6 @@ struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
157 parser->ctx = ctx; 144 parser->ctx = ctx;
158 parser->ibs = ibs; 145 parser->ibs = ibs;
159 parser->num_ibs = num_ibs; 146 parser->num_ibs = num_ibs;
160 if (amdgpu_enable_scheduler) {
161 mutex_init(&parser->job_lock);
162 INIT_WORK(&parser->job_work, amdgpu_job_work_func);
163 }
164 for (i = 0; i < num_ibs; i++) 147 for (i = 0; i < num_ibs; i++)
165 ibs[i].ctx = ctx; 148 ibs[i].ctx = ctx;
166 149
@@ -173,7 +156,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
173 uint64_t *chunk_array_user; 156 uint64_t *chunk_array_user;
174 uint64_t *chunk_array = NULL; 157 uint64_t *chunk_array = NULL;
175 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 158 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
176 struct amdgpu_bo_list *bo_list = NULL;
177 unsigned size, i; 159 unsigned size, i;
178 int r = 0; 160 int r = 0;
179 161
@@ -185,20 +167,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
185 r = -EINVAL; 167 r = -EINVAL;
186 goto out; 168 goto out;
187 } 169 }
188 bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); 170 p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
189 if (!amdgpu_enable_scheduler)
190 p->bo_list = bo_list;
191 else {
192 if (bo_list && !bo_list->has_userptr) {
193 p->bo_list = amdgpu_bo_list_clone(bo_list);
194 amdgpu_bo_list_put(bo_list);
195 if (!p->bo_list)
196 return -ENOMEM;
197 } else if (bo_list && bo_list->has_userptr)
198 p->bo_list = bo_list;
199 else
200 p->bo_list = NULL;
201 }
202 171
203 /* get chunks */ 172 /* get chunks */
204 INIT_LIST_HEAD(&p->validated); 173 INIT_LIST_HEAD(&p->validated);
@@ -291,7 +260,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
291 } 260 }
292 261
293 262
294 p->ibs = kmalloc_array(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); 263 p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
295 if (!p->ibs) 264 if (!p->ibs)
296 r = -ENOMEM; 265 r = -ENOMEM;
297 266
@@ -498,25 +467,24 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
498 unsigned i; 467 unsigned i;
499 if (parser->ctx) 468 if (parser->ctx)
500 amdgpu_ctx_put(parser->ctx); 469 amdgpu_ctx_put(parser->ctx);
501 if (parser->bo_list) { 470 if (parser->bo_list)
502 if (amdgpu_enable_scheduler && !parser->bo_list->has_userptr) 471 amdgpu_bo_list_put(parser->bo_list);
503 amdgpu_bo_list_free(parser->bo_list); 472
504 else
505 amdgpu_bo_list_put(parser->bo_list);
506 }
507 drm_free_large(parser->vm_bos); 473 drm_free_large(parser->vm_bos);
508 for (i = 0; i < parser->nchunks; i++) 474 for (i = 0; i < parser->nchunks; i++)
509 drm_free_large(parser->chunks[i].kdata); 475 drm_free_large(parser->chunks[i].kdata);
510 kfree(parser->chunks); 476 kfree(parser->chunks);
511 if (parser->ibs)
512 for (i = 0; i < parser->num_ibs; i++)
513 amdgpu_ib_free(parser->adev, &parser->ibs[i]);
514 kfree(parser->ibs);
515 if (parser->uf.bo)
516 drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
517
518 if (!amdgpu_enable_scheduler) 477 if (!amdgpu_enable_scheduler)
519 kfree(parser); 478 {
479 if (parser->ibs)
480 for (i = 0; i < parser->num_ibs; i++)
481 amdgpu_ib_free(parser->adev, &parser->ibs[i]);
482 kfree(parser->ibs);
483 if (parser->uf.bo)
484 drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
485 }
486
487 kfree(parser);
520} 488}
521 489
522/** 490/**
@@ -533,12 +501,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
533 amdgpu_cs_parser_fini_late(parser); 501 amdgpu_cs_parser_fini_late(parser);
534} 502}
535 503
536static int amdgpu_cs_parser_free_job(struct amdgpu_cs_parser *sched_job)
537{
538 amdgpu_cs_parser_fini_late(sched_job);
539 return 0;
540}
541
542static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, 504static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
543 struct amdgpu_vm *vm) 505 struct amdgpu_vm *vm)
544{ 506{
@@ -810,68 +772,16 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
810 return 0; 772 return 0;
811} 773}
812 774
813static int amdgpu_cs_parser_prepare_job(struct amdgpu_cs_parser *sched_job) 775static int amdgpu_cs_free_job(struct amdgpu_job *sched_job)
814{ 776{
815 int r, i; 777 int i;
816 struct amdgpu_cs_parser *parser = sched_job; 778 if (sched_job->ibs)
817 struct amdgpu_device *adev = sched_job->adev; 779 for (i = 0; i < sched_job->num_ibs; i++)
818 bool reserved_buffers = false; 780 amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]);
819 781 kfree(sched_job->ibs);
820 r = amdgpu_cs_parser_relocs(parser); 782 if (sched_job->uf.bo)
821 if (r) { 783 drm_gem_object_unreference_unlocked(&sched_job->uf.bo->gem_base);
822 if (r != -ERESTARTSYS) { 784 return 0;
823 if (r == -ENOMEM)
824 DRM_ERROR("Not enough memory for command submission!\n");
825 else
826 DRM_ERROR("Failed to process the buffer list %d!\n", r);
827 }
828 }
829
830 if (!r) {
831 reserved_buffers = true;
832 r = amdgpu_cs_ib_fill(adev, parser);
833 }
834 if (!r) {
835 r = amdgpu_cs_dependencies(adev, parser);
836 if (r)
837 DRM_ERROR("Failed in the dependencies handling %d!\n", r);
838 }
839 if (r) {
840 amdgpu_cs_parser_fini(parser, r, reserved_buffers);
841 return r;
842 }
843
844 for (i = 0; i < parser->num_ibs; i++)
845 trace_amdgpu_cs(parser, i);
846
847 r = amdgpu_cs_ib_vm_chunk(adev, parser);
848 return r;
849}
850
851static struct amdgpu_ring *amdgpu_cs_parser_get_ring(
852 struct amdgpu_device *adev,
853 struct amdgpu_cs_parser *parser)
854{
855 int i, r;
856
857 struct amdgpu_cs_chunk *chunk;
858 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
859 struct amdgpu_ring *ring;
860 for (i = 0; i < parser->nchunks; i++) {
861 chunk = &parser->chunks[i];
862 chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
863
864 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
865 continue;
866
867 r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
868 chunk_ib->ip_instance, chunk_ib->ring,
869 &ring);
870 if (r)
871 return NULL;
872 break;
873 }
874 return ring;
875} 785}
876 786
877int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 787int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
@@ -879,7 +789,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
879 struct amdgpu_device *adev = dev->dev_private; 789 struct amdgpu_device *adev = dev->dev_private;
880 union drm_amdgpu_cs *cs = data; 790 union drm_amdgpu_cs *cs = data;
881 struct amdgpu_cs_parser *parser; 791 struct amdgpu_cs_parser *parser;
882 int r; 792 bool reserved_buffers = false;
793 int i, r;
883 794
884 down_read(&adev->exclusive_lock); 795 down_read(&adev->exclusive_lock);
885 if (!adev->accel_working) { 796 if (!adev->accel_working) {
@@ -899,44 +810,79 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
899 return r; 810 return r;
900 } 811 }
901 812
902 if (amdgpu_enable_scheduler && parser->num_ibs) { 813 r = amdgpu_cs_parser_relocs(parser);
903 struct amdgpu_ring * ring = 814 if (r == -ENOMEM)
904 amdgpu_cs_parser_get_ring(adev, parser); 815 DRM_ERROR("Not enough memory for command submission!\n");
905 r = amdgpu_cs_parser_prepare_job(parser); 816 else if (r && r != -ERESTARTSYS)
817 DRM_ERROR("Failed to process the buffer list %d!\n", r);
818 else if (!r) {
819 reserved_buffers = true;
820 r = amdgpu_cs_ib_fill(adev, parser);
821 }
822
823 if (!r) {
824 r = amdgpu_cs_dependencies(adev, parser);
906 if (r) 825 if (r)
907 goto out; 826 DRM_ERROR("Failed in the dependencies handling %d!\n", r);
908 parser->ring = ring; 827 }
909 parser->free_job = amdgpu_cs_parser_free_job; 828
910 mutex_lock(&parser->job_lock); 829 if (r)
911 r = amd_sched_push_job(ring->scheduler, 830 goto out;
912 &parser->ctx->rings[ring->idx].entity, 831
913 parser, 832 for (i = 0; i < parser->num_ibs; i++)
914 &parser->s_fence); 833 trace_amdgpu_cs(parser, i);
834
835 r = amdgpu_cs_ib_vm_chunk(adev, parser);
836 if (r)
837 goto out;
838
839 if (amdgpu_enable_scheduler && parser->num_ibs) {
840 struct amdgpu_job *job;
841 struct amdgpu_ring * ring = parser->ibs->ring;
842 job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
843 if (!job)
844 return -ENOMEM;
845 job->base.sched = ring->scheduler;
846 job->base.s_entity = &parser->ctx->rings[ring->idx].entity;
847 job->adev = parser->adev;
848 job->ibs = parser->ibs;
849 job->num_ibs = parser->num_ibs;
850 job->base.owner = parser->filp;
851 mutex_init(&job->job_lock);
852 if (job->ibs[job->num_ibs - 1].user) {
853 memcpy(&job->uf, &parser->uf,
854 sizeof(struct amdgpu_user_fence));
855 job->ibs[job->num_ibs - 1].user = &job->uf;
856 }
857
858 job->free_job = amdgpu_cs_free_job;
859 mutex_lock(&job->job_lock);
860 r = amd_sched_entity_push_job((struct amd_sched_job *)job);
915 if (r) { 861 if (r) {
916 mutex_unlock(&parser->job_lock); 862 mutex_unlock(&job->job_lock);
863 amdgpu_cs_free_job(job);
864 kfree(job);
917 goto out; 865 goto out;
918 } 866 }
919 parser->ibs[parser->num_ibs - 1].sequence = 867 cs->out.handle =
920 amdgpu_ctx_add_fence(parser->ctx, ring, 868 amdgpu_ctx_add_fence(parser->ctx, ring,
921 &parser->s_fence->base, 869 &job->base.s_fence->base);
922 parser->s_fence->v_seq); 870 parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle;
923 cs->out.handle = parser->s_fence->v_seq; 871
924 list_sort(NULL, &parser->validated, cmp_size_smaller_first); 872 list_sort(NULL, &parser->validated, cmp_size_smaller_first);
925 ttm_eu_fence_buffer_objects(&parser->ticket, 873 ttm_eu_fence_buffer_objects(&parser->ticket,
926 &parser->validated, 874 &parser->validated,
927 &parser->s_fence->base); 875 &job->base.s_fence->base);
928 876
929 mutex_unlock(&parser->job_lock); 877 mutex_unlock(&job->job_lock);
878 amdgpu_cs_parser_fini_late(parser);
930 up_read(&adev->exclusive_lock); 879 up_read(&adev->exclusive_lock);
931 return 0; 880 return 0;
932 } 881 }
933 r = amdgpu_cs_parser_prepare_job(parser);
934 if (r)
935 goto out;
936 882
937 cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; 883 cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
938out: 884out:
939 amdgpu_cs_parser_fini(parser, r, true); 885 amdgpu_cs_parser_fini(parser, r, reserved_buffers);
940 up_read(&adev->exclusive_lock); 886 up_read(&adev->exclusive_lock);
941 r = amdgpu_cs_handle_lockup(adev, r); 887 r = amdgpu_cs_handle_lockup(adev, r);
942 return r; 888 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 08bc7722ddb8..20cbc4eb5a6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -229,17 +229,13 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
229} 229}
230 230
231uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, 231uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
232 struct fence *fence, uint64_t queued_seq) 232 struct fence *fence)
233{ 233{
234 struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; 234 struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
235 uint64_t seq = 0; 235 uint64_t seq = cring->sequence;
236 unsigned idx = 0; 236 unsigned idx = 0;
237 struct fence *other = NULL; 237 struct fence *other = NULL;
238 238
239 if (amdgpu_enable_scheduler)
240 seq = queued_seq;
241 else
242 seq = cring->sequence;
243 idx = seq % AMDGPU_CTX_MAX_CS_PENDING; 239 idx = seq % AMDGPU_CTX_MAX_CS_PENDING;
244 other = cring->fences[idx]; 240 other = cring->fences[idx];
245 if (other) { 241 if (other) {
@@ -253,8 +249,7 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
253 249
254 spin_lock(&ctx->ring_lock); 250 spin_lock(&ctx->ring_lock);
255 cring->fences[idx] = fence; 251 cring->fences[idx] = fence;
256 if (!amdgpu_enable_scheduler) 252 cring->sequence++;
257 cring->sequence++;
258 spin_unlock(&ctx->ring_lock); 253 spin_unlock(&ctx->ring_lock);
259 254
260 fence_put(other); 255 fence_put(other);
@@ -267,21 +262,16 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
267{ 262{
268 struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; 263 struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
269 struct fence *fence; 264 struct fence *fence;
270 uint64_t queued_seq;
271 265
272 spin_lock(&ctx->ring_lock); 266 spin_lock(&ctx->ring_lock);
273 if (amdgpu_enable_scheduler)
274 queued_seq = amd_sched_next_queued_seq(&cring->entity);
275 else
276 queued_seq = cring->sequence;
277 267
278 if (seq >= queued_seq) { 268 if (seq >= cring->sequence) {
279 spin_unlock(&ctx->ring_lock); 269 spin_unlock(&ctx->ring_lock);
280 return ERR_PTR(-EINVAL); 270 return ERR_PTR(-EINVAL);
281 } 271 }
282 272
283 273
284 if (seq + AMDGPU_CTX_MAX_CS_PENDING < queued_seq) { 274 if (seq + AMDGPU_CTX_MAX_CS_PENDING < cring->sequence) {
285 spin_unlock(&ctx->ring_lock); 275 spin_unlock(&ctx->ring_lock);
286 return NULL; 276 return NULL;
287 } 277 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index e6fa27805207..0fcc0bd1622c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -49,9 +49,10 @@
49/* 49/*
50 * KMS wrapper. 50 * KMS wrapper.
51 * - 3.0.0 - initial driver 51 * - 3.0.0 - initial driver
52 * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP)
52 */ 53 */
53#define KMS_DRIVER_MAJOR 3 54#define KMS_DRIVER_MAJOR 3
54#define KMS_DRIVER_MINOR 0 55#define KMS_DRIVER_MINOR 1
55#define KMS_DRIVER_PATCHLEVEL 0 56#define KMS_DRIVER_PATCHLEVEL 0
56 57
57int amdgpu_vram_limit = 0; 58int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 98500f1756f7..f446bf2fedc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -626,10 +626,10 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
626 ring->fence_drv.ring = ring; 626 ring->fence_drv.ring = ring;
627 627
628 if (amdgpu_enable_scheduler) { 628 if (amdgpu_enable_scheduler) {
629 ring->scheduler = amd_sched_create((void *)ring->adev, 629 ring->scheduler = amd_sched_create(&amdgpu_sched_ops,
630 &amdgpu_sched_ops, 630 ring->idx,
631 ring->idx, 5, 0, 631 amdgpu_sched_hw_submission,
632 amdgpu_sched_hw_submission); 632 (void *)ring->adev);
633 if (!ring->scheduler) 633 if (!ring->scheduler)
634 DRM_ERROR("Failed to create scheduler on ring %d.\n", 634 DRM_ERROR("Failed to create scheduler on ring %d.\n",
635 ring->idx); 635 ring->idx);
@@ -836,22 +836,37 @@ static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence)
836 return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); 836 return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
837} 837}
838 838
839static inline bool amdgpu_test_signaled_any(struct amdgpu_fence **fences) 839static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count)
840{ 840{
841 int idx; 841 int idx;
842 struct amdgpu_fence *fence; 842 struct fence *fence;
843 843
844 idx = 0; 844 for (idx = 0; idx < count; ++idx) {
845 for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) {
846 fence = fences[idx]; 845 fence = fences[idx];
847 if (fence) { 846 if (fence) {
848 if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags)) 847 if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
849 return true; 848 return true;
850 } 849 }
851 } 850 }
852 return false; 851 return false;
853} 852}
854 853
854static bool amdgpu_test_signaled_all(struct fence **fences, uint32_t count)
855{
856 int idx;
857 struct fence *fence;
858
859 for (idx = 0; idx < count; ++idx) {
860 fence = fences[idx];
861 if (fence) {
862 if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
863 return false;
864 }
865 }
866
867 return true;
868}
869
855struct amdgpu_wait_cb { 870struct amdgpu_wait_cb {
856 struct fence_cb base; 871 struct fence_cb base;
857 struct task_struct *task; 872 struct task_struct *task;
@@ -867,33 +882,56 @@ static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
867static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, 882static signed long amdgpu_fence_default_wait(struct fence *f, bool intr,
868 signed long t) 883 signed long t)
869{ 884{
870 struct amdgpu_fence *array[AMDGPU_MAX_RINGS];
871 struct amdgpu_fence *fence = to_amdgpu_fence(f); 885 struct amdgpu_fence *fence = to_amdgpu_fence(f);
872 struct amdgpu_device *adev = fence->ring->adev; 886 struct amdgpu_device *adev = fence->ring->adev;
873 887
874 memset(&array[0], 0, sizeof(array)); 888 return amdgpu_fence_wait_multiple(adev, &f, 1, false, intr, t);
875 array[0] = fence;
876
877 return amdgpu_fence_wait_any(adev, array, intr, t);
878} 889}
879 890
880/* wait until any fence in array signaled */ 891/**
881signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, 892 * Wait the fence array with timeout
882 struct amdgpu_fence **array, bool intr, signed long t) 893 *
894 * @adev: amdgpu device
895 * @array: the fence array with amdgpu fence pointer
896 * @count: the number of the fence array
897 * @wait_all: the flag of wait all(true) or wait any(false)
898 * @intr: when sleep, set the current task interruptable or not
899 * @t: timeout to wait
900 *
901 * If wait_all is true, it will return when all fences are signaled or timeout.
902 * If wait_all is false, it will return when any fence is signaled or timeout.
903 */
904signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev,
905 struct fence **array,
906 uint32_t count,
907 bool wait_all,
908 bool intr,
909 signed long t)
883{ 910{
884 long idx = 0; 911 long idx = 0;
885 struct amdgpu_wait_cb cb[AMDGPU_MAX_RINGS]; 912 struct amdgpu_wait_cb *cb;
886 struct amdgpu_fence *fence; 913 struct fence *fence;
887 914
888 BUG_ON(!array); 915 BUG_ON(!array);
889 916
890 for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { 917 cb = kcalloc(count, sizeof(struct amdgpu_wait_cb), GFP_KERNEL);
918 if (cb == NULL) {
919 t = -ENOMEM;
920 goto err_free_cb;
921 }
922
923 for (idx = 0; idx < count; ++idx) {
891 fence = array[idx]; 924 fence = array[idx];
892 if (fence) { 925 if (fence) {
893 cb[idx].task = current; 926 cb[idx].task = current;
894 if (fence_add_callback(&fence->base, 927 if (fence_add_callback(fence,
895 &cb[idx].base, amdgpu_fence_wait_cb)) 928 &cb[idx].base, amdgpu_fence_wait_cb)) {
896 return t; /* return if fence is already signaled */ 929 /* The fence is already signaled */
930 if (wait_all)
931 continue;
932 else
933 goto fence_rm_cb;
934 }
897 } 935 }
898 } 936 }
899 937
@@ -907,7 +945,9 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
907 * amdgpu_test_signaled_any must be called after 945 * amdgpu_test_signaled_any must be called after
908 * set_current_state to prevent a race with wake_up_process 946 * set_current_state to prevent a race with wake_up_process
909 */ 947 */
910 if (amdgpu_test_signaled_any(array)) 948 if (!wait_all && amdgpu_test_signaled_any(array, count))
949 break;
950 if (wait_all && amdgpu_test_signaled_all(array, count))
911 break; 951 break;
912 952
913 if (adev->needs_reset) { 953 if (adev->needs_reset) {
@@ -923,13 +963,16 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
923 963
924 __set_current_state(TASK_RUNNING); 964 __set_current_state(TASK_RUNNING);
925 965
926 idx = 0; 966fence_rm_cb:
927 for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { 967 for (idx = 0; idx < count; ++idx) {
928 fence = array[idx]; 968 fence = array[idx];
929 if (fence) 969 if (fence && cb[idx].base.func)
930 fence_remove_callback(&fence->base, &cb[idx].base); 970 fence_remove_callback(fence, &cb[idx].base);
931 } 971 }
932 972
973err_free_cb:
974 kfree(cb);
975
933 return t; 976 return t;
934} 977}
935 978
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 5104e64e9ad8..c439735ee670 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -73,29 +73,12 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
73 73
74 if (!vm) 74 if (!vm)
75 ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); 75 ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
76 else
77 ib->gpu_addr = 0;
78
79 } else {
80 ib->sa_bo = NULL;
81 ib->ptr = NULL;
82 ib->gpu_addr = 0;
83 } 76 }
84 77
85 amdgpu_sync_create(&ib->sync); 78 amdgpu_sync_create(&ib->sync);
86 79
87 ib->ring = ring; 80 ib->ring = ring;
88 ib->fence = NULL;
89 ib->user = NULL;
90 ib->vm = vm; 81 ib->vm = vm;
91 ib->ctx = NULL;
92 ib->gds_base = 0;
93 ib->gds_size = 0;
94 ib->gws_base = 0;
95 ib->gws_size = 0;
96 ib->oa_base = 0;
97 ib->oa_size = 0;
98 ib->flags = 0;
99 82
100 return 0; 83 return 0;
101} 84}
@@ -110,8 +93,8 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
110 */ 93 */
111void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) 94void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
112{ 95{
113 amdgpu_sync_free(adev, &ib->sync, ib->fence); 96 amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
114 amdgpu_sa_bo_free(adev, &ib->sa_bo, ib->fence); 97 amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
115 amdgpu_fence_unref(&ib->fence); 98 amdgpu_fence_unref(&ib->fence);
116} 99}
117 100
@@ -143,7 +126,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
143 struct amdgpu_ring *ring; 126 struct amdgpu_ring *ring;
144 struct amdgpu_ctx *ctx, *old_ctx; 127 struct amdgpu_ctx *ctx, *old_ctx;
145 struct amdgpu_vm *vm; 128 struct amdgpu_vm *vm;
146 uint64_t sequence;
147 unsigned i; 129 unsigned i;
148 int r = 0; 130 int r = 0;
149 131
@@ -158,7 +140,11 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
158 dev_err(adev->dev, "couldn't schedule ib\n"); 140 dev_err(adev->dev, "couldn't schedule ib\n");
159 return -EINVAL; 141 return -EINVAL;
160 } 142 }
161 143 r = amdgpu_sync_wait(&ibs->sync);
144 if (r) {
145 dev_err(adev->dev, "IB sync failed (%d).\n", r);
146 return r;
147 }
162 r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs); 148 r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs);
163 if (r) { 149 if (r) {
164 dev_err(adev->dev, "scheduling IB failed (%d).\n", r); 150 dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
@@ -216,12 +202,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
216 return r; 202 return r;
217 } 203 }
218 204
219 sequence = amdgpu_enable_scheduler ? ib->sequence : 0;
220
221 if (!amdgpu_enable_scheduler && ib->ctx) 205 if (!amdgpu_enable_scheduler && ib->ctx)
222 ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, 206 ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
223 &ib->fence->base, 207 &ib->fence->base);
224 sequence);
225 208
226 /* wrap the last IB with fence */ 209 /* wrap the last IB with fence */
227 if (ib->user) { 210 if (ib->user) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 90044b254404..5c8a803acedc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -98,18 +98,12 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
98 /* add 8 bytes for the rptr/wptr shadows and 98 /* add 8 bytes for the rptr/wptr shadows and
99 * add them to the end of the ring allocation. 99 * add them to the end of the ring allocation.
100 */ 100 */
101 adev->irq.ih.ring = kzalloc(adev->irq.ih.ring_size + 8, GFP_KERNEL); 101 adev->irq.ih.ring = pci_alloc_consistent(adev->pdev,
102 adev->irq.ih.ring_size + 8,
103 &adev->irq.ih.rb_dma_addr);
102 if (adev->irq.ih.ring == NULL) 104 if (adev->irq.ih.ring == NULL)
103 return -ENOMEM; 105 return -ENOMEM;
104 adev->irq.ih.rb_dma_addr = pci_map_single(adev->pdev, 106 memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8);
105 (void *)adev->irq.ih.ring,
106 adev->irq.ih.ring_size,
107 PCI_DMA_BIDIRECTIONAL);
108 if (pci_dma_mapping_error(adev->pdev, adev->irq.ih.rb_dma_addr)) {
109 dev_err(&adev->pdev->dev, "Failed to DMA MAP the IH RB page\n");
110 kfree((void *)adev->irq.ih.ring);
111 return -ENOMEM;
112 }
113 adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0; 107 adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0;
114 adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1; 108 adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1;
115 } 109 }
@@ -149,9 +143,9 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
149 /* add 8 bytes for the rptr/wptr shadows and 143 /* add 8 bytes for the rptr/wptr shadows and
150 * add them to the end of the ring allocation. 144 * add them to the end of the ring allocation.
151 */ 145 */
152 pci_unmap_single(adev->pdev, adev->irq.ih.rb_dma_addr, 146 pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8,
153 adev->irq.ih.ring_size + 8, PCI_DMA_BIDIRECTIONAL); 147 (void *)adev->irq.ih.ring,
154 kfree((void *)adev->irq.ih.ring); 148 adev->irq.ih.rb_dma_addr);
155 adev->irq.ih.ring = NULL; 149 adev->irq.ih.ring = NULL;
156 } 150 }
157 } else { 151 } else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 87da6b1848fd..22367939ebf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -560,6 +560,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
560 if (!fpriv) 560 if (!fpriv)
561 return; 561 return;
562 562
563 amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
564
563 amdgpu_vm_fini(adev, &fpriv->vm); 565 amdgpu_vm_fini(adev, &fpriv->vm);
564 566
565 idr_for_each_entry(&fpriv->bo_list_handles, list, handle) 567 idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
@@ -568,8 +570,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
568 idr_destroy(&fpriv->bo_list_handles); 570 idr_destroy(&fpriv->bo_list_handles);
569 mutex_destroy(&fpriv->bo_list_lock); 571 mutex_destroy(&fpriv->bo_list_lock);
570 572
571 amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
572
573 kfree(fpriv); 573 kfree(fpriv);
574 file_priv->driver_priv = NULL; 574 file_priv->driver_priv = NULL;
575} 575}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 238465a9ac55..6ea18dcec561 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -193,7 +193,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
193 unsigned size, unsigned align); 193 unsigned size, unsigned align);
194void amdgpu_sa_bo_free(struct amdgpu_device *adev, 194void amdgpu_sa_bo_free(struct amdgpu_device *adev,
195 struct amdgpu_sa_bo **sa_bo, 195 struct amdgpu_sa_bo **sa_bo,
196 struct amdgpu_fence *fence); 196 struct fence *fence);
197#if defined(CONFIG_DEBUG_FS) 197#if defined(CONFIG_DEBUG_FS)
198void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, 198void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
199 struct seq_file *m); 199 struct seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index d6398cf45f24..b92525329d6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -139,6 +139,20 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
139 return r; 139 return r;
140} 140}
141 141
142static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f)
143{
144 struct amdgpu_fence *a_fence;
145 struct amd_sched_fence *s_fence;
146
147 s_fence = to_amd_sched_fence(f);
148 if (s_fence)
149 return s_fence->scheduler->ring_id;
150 a_fence = to_amdgpu_fence(f);
151 if (a_fence)
152 return a_fence->ring->idx;
153 return 0;
154}
155
142static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) 156static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
143{ 157{
144 struct amdgpu_sa_manager *sa_manager = sa_bo->manager; 158 struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
@@ -147,7 +161,7 @@ static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
147 } 161 }
148 list_del_init(&sa_bo->olist); 162 list_del_init(&sa_bo->olist);
149 list_del_init(&sa_bo->flist); 163 list_del_init(&sa_bo->flist);
150 amdgpu_fence_unref(&sa_bo->fence); 164 fence_put(sa_bo->fence);
151 kfree(sa_bo); 165 kfree(sa_bo);
152} 166}
153 167
@@ -161,7 +175,7 @@ static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
161 sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist); 175 sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist);
162 list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) { 176 list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
163 if (sa_bo->fence == NULL || 177 if (sa_bo->fence == NULL ||
164 !fence_is_signaled(&sa_bo->fence->base)) { 178 !fence_is_signaled(sa_bo->fence)) {
165 return; 179 return;
166 } 180 }
167 amdgpu_sa_bo_remove_locked(sa_bo); 181 amdgpu_sa_bo_remove_locked(sa_bo);
@@ -246,7 +260,7 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
246} 260}
247 261
248static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, 262static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
249 struct amdgpu_fence **fences, 263 struct fence **fences,
250 unsigned *tries) 264 unsigned *tries)
251{ 265{
252 struct amdgpu_sa_bo *best_bo = NULL; 266 struct amdgpu_sa_bo *best_bo = NULL;
@@ -275,7 +289,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
275 sa_bo = list_first_entry(&sa_manager->flist[i], 289 sa_bo = list_first_entry(&sa_manager->flist[i],
276 struct amdgpu_sa_bo, flist); 290 struct amdgpu_sa_bo, flist);
277 291
278 if (!fence_is_signaled(&sa_bo->fence->base)) { 292 if (!fence_is_signaled(sa_bo->fence)) {
279 fences[i] = sa_bo->fence; 293 fences[i] = sa_bo->fence;
280 continue; 294 continue;
281 } 295 }
@@ -299,7 +313,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
299 } 313 }
300 314
301 if (best_bo) { 315 if (best_bo) {
302 ++tries[best_bo->fence->ring->idx]; 316 uint32_t idx = amdgpu_sa_get_ring_from_fence(best_bo->fence);
317 ++tries[idx];
303 sa_manager->hole = best_bo->olist.prev; 318 sa_manager->hole = best_bo->olist.prev;
304 319
305 /* we knew that this one is signaled, 320 /* we knew that this one is signaled,
@@ -315,7 +330,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
315 struct amdgpu_sa_bo **sa_bo, 330 struct amdgpu_sa_bo **sa_bo,
316 unsigned size, unsigned align) 331 unsigned size, unsigned align)
317{ 332{
318 struct amdgpu_fence *fences[AMDGPU_MAX_RINGS]; 333 struct fence *fences[AMDGPU_MAX_RINGS];
319 unsigned tries[AMDGPU_MAX_RINGS]; 334 unsigned tries[AMDGPU_MAX_RINGS];
320 int i, r; 335 int i, r;
321 signed long t; 336 signed long t;
@@ -352,7 +367,8 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
352 } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries)); 367 } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
353 368
354 spin_unlock(&sa_manager->wq.lock); 369 spin_unlock(&sa_manager->wq.lock);
355 t = amdgpu_fence_wait_any(adev, fences, false, MAX_SCHEDULE_TIMEOUT); 370 t = amdgpu_fence_wait_multiple(adev, fences, AMDGPU_MAX_RINGS, false, false,
371 MAX_SCHEDULE_TIMEOUT);
356 r = (t > 0) ? 0 : t; 372 r = (t > 0) ? 0 : t;
357 spin_lock(&sa_manager->wq.lock); 373 spin_lock(&sa_manager->wq.lock);
358 /* if we have nothing to wait for block */ 374 /* if we have nothing to wait for block */
@@ -372,7 +388,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
372} 388}
373 389
374void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, 390void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
375 struct amdgpu_fence *fence) 391 struct fence *fence)
376{ 392{
377 struct amdgpu_sa_manager *sa_manager; 393 struct amdgpu_sa_manager *sa_manager;
378 394
@@ -382,10 +398,11 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
382 398
383 sa_manager = (*sa_bo)->manager; 399 sa_manager = (*sa_bo)->manager;
384 spin_lock(&sa_manager->wq.lock); 400 spin_lock(&sa_manager->wq.lock);
385 if (fence && !fence_is_signaled(&fence->base)) { 401 if (fence && !fence_is_signaled(fence)) {
386 (*sa_bo)->fence = amdgpu_fence_ref(fence); 402 uint32_t idx;
387 list_add_tail(&(*sa_bo)->flist, 403 (*sa_bo)->fence = fence_get(fence);
388 &sa_manager->flist[fence->ring->idx]); 404 idx = amdgpu_sa_get_ring_from_fence(fence);
405 list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
389 } else { 406 } else {
390 amdgpu_sa_bo_remove_locked(*sa_bo); 407 amdgpu_sa_bo_remove_locked(*sa_bo);
391 } 408 }
@@ -412,8 +429,16 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
412 seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", 429 seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
413 soffset, eoffset, eoffset - soffset); 430 soffset, eoffset, eoffset - soffset);
414 if (i->fence) { 431 if (i->fence) {
415 seq_printf(m, " protected by 0x%016llx on ring %d", 432 struct amdgpu_fence *a_fence = to_amdgpu_fence(i->fence);
416 i->fence->seq, i->fence->ring->idx); 433 struct amd_sched_fence *s_fence = to_amd_sched_fence(i->fence);
434 if (a_fence)
435 seq_printf(m, " protected by 0x%016llx on ring %d",
436 a_fence->seq, a_fence->ring->idx);
437 if (s_fence)
438 seq_printf(m, " protected by 0x%016x on ring %d",
439 s_fence->base.seqno,
440 s_fence->scheduler->ring_id);
441
417 } 442 }
418 seq_printf(m, "\n"); 443 seq_printf(m, "\n");
419 } 444 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index a86e38158afa..f93fb3541488 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -27,55 +27,28 @@
27#include <drm/drmP.h> 27#include <drm/drmP.h>
28#include "amdgpu.h" 28#include "amdgpu.h"
29 29
30static int amdgpu_sched_prepare_job(struct amd_gpu_scheduler *sched, 30static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job)
31 struct amd_sched_entity *entity,
32 struct amd_sched_job *job)
33{ 31{
34 int r = 0; 32 struct amdgpu_job *sched_job;
35 struct amdgpu_cs_parser *sched_job;
36 if (!job || !job->data) {
37 DRM_ERROR("job is null\n");
38 return -EINVAL;
39 }
40
41 sched_job = (struct amdgpu_cs_parser *)job->data;
42 if (sched_job->prepare_job) {
43 r = sched_job->prepare_job(sched_job);
44 if (r) {
45 DRM_ERROR("Prepare job error\n");
46 schedule_work(&sched_job->job_work);
47 }
48 }
49 return r;
50}
51
52static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched,
53 struct amd_sched_entity *entity,
54 struct amd_sched_job *job)
55{
56 int r = 0;
57 struct amdgpu_cs_parser *sched_job;
58 struct amdgpu_fence *fence; 33 struct amdgpu_fence *fence;
34 int r;
59 35
60 if (!job || !job->data) { 36 if (!job) {
61 DRM_ERROR("job is null\n"); 37 DRM_ERROR("job is null\n");
62 return NULL; 38 return NULL;
63 } 39 }
64 sched_job = (struct amdgpu_cs_parser *)job->data; 40 sched_job = (struct amdgpu_job *)job;
65 mutex_lock(&sched_job->job_lock); 41 mutex_lock(&sched_job->job_lock);
66 r = amdgpu_ib_schedule(sched_job->adev, 42 r = amdgpu_ib_schedule(sched_job->adev,
67 sched_job->num_ibs, 43 sched_job->num_ibs,
68 sched_job->ibs, 44 sched_job->ibs,
69 sched_job->filp); 45 sched_job->base.owner);
70 if (r) 46 if (r)
71 goto err; 47 goto err;
72 fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence); 48 fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence);
73 49
74 if (sched_job->run_job) { 50 if (sched_job->free_job)
75 r = sched_job->run_job(sched_job); 51 sched_job->free_job(sched_job);
76 if (r)
77 goto err;
78 }
79 52
80 mutex_unlock(&sched_job->job_lock); 53 mutex_unlock(&sched_job->job_lock);
81 return &fence->base; 54 return &fence->base;
@@ -83,25 +56,25 @@ static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched,
83err: 56err:
84 DRM_ERROR("Run job error\n"); 57 DRM_ERROR("Run job error\n");
85 mutex_unlock(&sched_job->job_lock); 58 mutex_unlock(&sched_job->job_lock);
86 schedule_work(&sched_job->job_work); 59 job->sched->ops->process_job(job);
87 return NULL; 60 return NULL;
88} 61}
89 62
90static void amdgpu_sched_process_job(struct amd_gpu_scheduler *sched, 63static void amdgpu_sched_process_job(struct amd_sched_job *job)
91 struct amd_sched_job *job)
92{ 64{
93 struct amdgpu_cs_parser *sched_job; 65 struct amdgpu_job *sched_job;
94 66
95 if (!job || !job->data) { 67 if (!job) {
96 DRM_ERROR("job is null\n"); 68 DRM_ERROR("job is null\n");
97 return; 69 return;
98 } 70 }
99 sched_job = (struct amdgpu_cs_parser *)job->data; 71 sched_job = (struct amdgpu_job *)job;
100 schedule_work(&sched_job->job_work); 72 /* after processing job, free memory */
73 fence_put(&sched_job->base.s_fence->base);
74 kfree(sched_job);
101} 75}
102 76
103struct amd_sched_backend_ops amdgpu_sched_ops = { 77struct amd_sched_backend_ops amdgpu_sched_ops = {
104 .prepare_job = amdgpu_sched_prepare_job,
105 .run_job = amdgpu_sched_run_job, 78 .run_job = amdgpu_sched_run_job,
106 .process_job = amdgpu_sched_process_job 79 .process_job = amdgpu_sched_process_job
107}; 80};
@@ -110,36 +83,39 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
110 struct amdgpu_ring *ring, 83 struct amdgpu_ring *ring,
111 struct amdgpu_ib *ibs, 84 struct amdgpu_ib *ibs,
112 unsigned num_ibs, 85 unsigned num_ibs,
113 int (*free_job)(struct amdgpu_cs_parser *), 86 int (*free_job)(struct amdgpu_job *),
114 void *owner, 87 void *owner,
115 struct fence **f) 88 struct fence **f)
116{ 89{
117 int r = 0; 90 int r = 0;
118 if (amdgpu_enable_scheduler) { 91 if (amdgpu_enable_scheduler) {
119 struct amdgpu_cs_parser *sched_job = 92 struct amdgpu_job *job =
120 amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx, 93 kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
121 ibs, num_ibs); 94 if (!job)
122 if(!sched_job) {
123 return -ENOMEM; 95 return -ENOMEM;
124 } 96 job->base.sched = ring->scheduler;
125 sched_job->free_job = free_job; 97 job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
126 mutex_lock(&sched_job->job_lock); 98 job->adev = adev;
127 r = amd_sched_push_job(ring->scheduler, 99 job->ibs = ibs;
128 &adev->kernel_ctx.rings[ring->idx].entity, 100 job->num_ibs = num_ibs;
129 sched_job, &sched_job->s_fence); 101 job->base.owner = owner;
102 mutex_init(&job->job_lock);
103 job->free_job = free_job;
104 mutex_lock(&job->job_lock);
105 r = amd_sched_entity_push_job((struct amd_sched_job *)job);
130 if (r) { 106 if (r) {
131 mutex_unlock(&sched_job->job_lock); 107 mutex_unlock(&job->job_lock);
132 kfree(sched_job); 108 kfree(job);
133 return r; 109 return r;
134 } 110 }
135 ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq; 111 *f = fence_get(&job->base.s_fence->base);
136 *f = fence_get(&sched_job->s_fence->base); 112 mutex_unlock(&job->job_lock);
137 mutex_unlock(&sched_job->job_lock);
138 } else { 113 } else {
139 r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); 114 r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
140 if (r) 115 if (r)
141 return r; 116 return r;
142 *f = fence_get(&ibs[num_ibs - 1].fence->base); 117 *f = fence_get(&ibs[num_ibs - 1].fence->base);
143 } 118 }
119
144 return 0; 120 return 0;
145} 121}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
index d6d41a42ab65..ff3ca52ec6fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
@@ -87,7 +87,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
87 87
88void amdgpu_semaphore_free(struct amdgpu_device *adev, 88void amdgpu_semaphore_free(struct amdgpu_device *adev,
89 struct amdgpu_semaphore **semaphore, 89 struct amdgpu_semaphore **semaphore,
90 struct amdgpu_fence *fence) 90 struct fence *fence)
91{ 91{
92 if (semaphore == NULL || *semaphore == NULL) { 92 if (semaphore == NULL || *semaphore == NULL) {
93 return; 93 return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 7cb711fc1ee2..4fffb2539331 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -32,6 +32,11 @@
32#include "amdgpu.h" 32#include "amdgpu.h"
33#include "amdgpu_trace.h" 33#include "amdgpu_trace.h"
34 34
35struct amdgpu_sync_entry {
36 struct hlist_node node;
37 struct fence *fence;
38};
39
35/** 40/**
36 * amdgpu_sync_create - zero init sync object 41 * amdgpu_sync_create - zero init sync object
37 * 42 *
@@ -49,9 +54,33 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
49 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 54 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
50 sync->sync_to[i] = NULL; 55 sync->sync_to[i] = NULL;
51 56
57 hash_init(sync->fences);
52 sync->last_vm_update = NULL; 58 sync->last_vm_update = NULL;
53} 59}
54 60
61static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
62{
63 struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
64 struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
65
66 if (a_fence)
67 return a_fence->ring->adev == adev;
68 if (s_fence)
69 return (struct amdgpu_device *)s_fence->scheduler->priv == adev;
70 return false;
71}
72
73static bool amdgpu_sync_test_owner(struct fence *f, void *owner)
74{
75 struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
76 struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
77 if (s_fence)
78 return s_fence->owner == owner;
79 if (a_fence)
80 return a_fence->owner == owner;
81 return false;
82}
83
55/** 84/**
56 * amdgpu_sync_fence - remember to sync to this fence 85 * amdgpu_sync_fence - remember to sync to this fence
57 * 86 *
@@ -62,28 +91,54 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
62int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, 91int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
63 struct fence *f) 92 struct fence *f)
64{ 93{
94 struct amdgpu_sync_entry *e;
65 struct amdgpu_fence *fence; 95 struct amdgpu_fence *fence;
66 struct amdgpu_fence *other; 96 struct amdgpu_fence *other;
97 struct fence *tmp, *later;
67 98
68 if (!f) 99 if (!f)
69 return 0; 100 return 0;
70 101
102 if (amdgpu_sync_same_dev(adev, f) &&
103 amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) {
104 if (sync->last_vm_update) {
105 tmp = sync->last_vm_update;
106 BUG_ON(f->context != tmp->context);
107 later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp;
108 sync->last_vm_update = fence_get(later);
109 fence_put(tmp);
110 } else
111 sync->last_vm_update = fence_get(f);
112 }
113
71 fence = to_amdgpu_fence(f); 114 fence = to_amdgpu_fence(f);
72 if (!fence || fence->ring->adev != adev) 115 if (!fence || fence->ring->adev != adev) {
73 return fence_wait(f, true); 116 hash_for_each_possible(sync->fences, e, node, f->context) {
117 struct fence *new;
118 if (unlikely(e->fence->context != f->context))
119 continue;
120 new = fence_get(fence_later(e->fence, f));
121 if (new) {
122 fence_put(e->fence);
123 e->fence = new;
124 }
125 return 0;
126 }
127
128 e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
129 if (!e)
130 return -ENOMEM;
131
132 hash_add(sync->fences, &e->node, f->context);
133 e->fence = fence_get(f);
134 return 0;
135 }
74 136
75 other = sync->sync_to[fence->ring->idx]; 137 other = sync->sync_to[fence->ring->idx];
76 sync->sync_to[fence->ring->idx] = amdgpu_fence_ref( 138 sync->sync_to[fence->ring->idx] = amdgpu_fence_ref(
77 amdgpu_fence_later(fence, other)); 139 amdgpu_fence_later(fence, other));
78 amdgpu_fence_unref(&other); 140 amdgpu_fence_unref(&other);
79 141
80 if (fence->owner == AMDGPU_FENCE_OWNER_VM) {
81 other = sync->last_vm_update;
82 sync->last_vm_update = amdgpu_fence_ref(
83 amdgpu_fence_later(fence, other));
84 amdgpu_fence_unref(&other);
85 }
86
87 return 0; 142 return 0;
88} 143}
89 144
@@ -147,6 +202,24 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
147 return r; 202 return r;
148} 203}
149 204
205int amdgpu_sync_wait(struct amdgpu_sync *sync)
206{
207 struct amdgpu_sync_entry *e;
208 struct hlist_node *tmp;
209 int i, r;
210
211 hash_for_each_safe(sync->fences, i, tmp, e, node) {
212 r = fence_wait(e->fence, false);
213 if (r)
214 return r;
215
216 hash_del(&e->node);
217 fence_put(e->fence);
218 kfree(e);
219 }
220 return 0;
221}
222
150/** 223/**
151 * amdgpu_sync_rings - sync ring to all registered fences 224 * amdgpu_sync_rings - sync ring to all registered fences
152 * 225 *
@@ -234,15 +307,23 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
234 */ 307 */
235void amdgpu_sync_free(struct amdgpu_device *adev, 308void amdgpu_sync_free(struct amdgpu_device *adev,
236 struct amdgpu_sync *sync, 309 struct amdgpu_sync *sync,
237 struct amdgpu_fence *fence) 310 struct fence *fence)
238{ 311{
312 struct amdgpu_sync_entry *e;
313 struct hlist_node *tmp;
239 unsigned i; 314 unsigned i;
240 315
316 hash_for_each_safe(sync->fences, i, tmp, e, node) {
317 hash_del(&e->node);
318 fence_put(e->fence);
319 kfree(e);
320 }
321
241 for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) 322 for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
242 amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); 323 amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
243 324
244 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 325 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
245 amdgpu_fence_unref(&sync->sync_to[i]); 326 amdgpu_fence_unref(&sync->sync_to[i]);
246 327
247 amdgpu_fence_unref(&sync->last_vm_update); 328 fence_put(sync->last_vm_update);
248} 329}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 962dd5552137..f80b1a43be8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -77,7 +77,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
77 void *gtt_map, *vram_map; 77 void *gtt_map, *vram_map;
78 void **gtt_start, **gtt_end; 78 void **gtt_start, **gtt_end;
79 void **vram_start, **vram_end; 79 void **vram_start, **vram_end;
80 struct amdgpu_fence *fence = NULL; 80 struct fence *fence = NULL;
81 81
82 r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, 82 r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
83 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i); 83 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i);
@@ -116,13 +116,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
116 goto out_lclean_unpin; 116 goto out_lclean_unpin;
117 } 117 }
118 118
119 r = fence_wait(&fence->base, false); 119 r = fence_wait(fence, false);
120 if (r) { 120 if (r) {
121 DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); 121 DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
122 goto out_lclean_unpin; 122 goto out_lclean_unpin;
123 } 123 }
124 124
125 amdgpu_fence_unref(&fence); 125 fence_put(fence);
126 126
127 r = amdgpu_bo_kmap(vram_obj, &vram_map); 127 r = amdgpu_bo_kmap(vram_obj, &vram_map);
128 if (r) { 128 if (r) {
@@ -161,13 +161,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
161 goto out_lclean_unpin; 161 goto out_lclean_unpin;
162 } 162 }
163 163
164 r = fence_wait(&fence->base, false); 164 r = fence_wait(fence, false);
165 if (r) { 165 if (r) {
166 DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); 166 DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
167 goto out_lclean_unpin; 167 goto out_lclean_unpin;
168 } 168 }
169 169
170 amdgpu_fence_unref(&fence); 170 fence_put(fence);
171 171
172 r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map); 172 r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
173 if (r) { 173 if (r) {
@@ -214,7 +214,7 @@ out_lclean:
214 amdgpu_bo_unref(&gtt_obj[i]); 214 amdgpu_bo_unref(&gtt_obj[i]);
215 } 215 }
216 if (fence) 216 if (fence)
217 amdgpu_fence_unref(&fence); 217 fence_put(fence);
218 break; 218 break;
219 } 219 }
220 220
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dd3415d2e45d..399143541d8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -228,7 +228,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
228 struct amdgpu_device *adev; 228 struct amdgpu_device *adev;
229 struct amdgpu_ring *ring; 229 struct amdgpu_ring *ring;
230 uint64_t old_start, new_start; 230 uint64_t old_start, new_start;
231 struct amdgpu_fence *fence; 231 struct fence *fence;
232 int r; 232 int r;
233 233
234 adev = amdgpu_get_adev(bo->bdev); 234 adev = amdgpu_get_adev(bo->bdev);
@@ -269,9 +269,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
269 new_mem->num_pages * PAGE_SIZE, /* bytes */ 269 new_mem->num_pages * PAGE_SIZE, /* bytes */
270 bo->resv, &fence); 270 bo->resv, &fence);
271 /* FIXME: handle copy error */ 271 /* FIXME: handle copy error */
272 r = ttm_bo_move_accel_cleanup(bo, &fence->base, 272 r = ttm_bo_move_accel_cleanup(bo, fence,
273 evict, no_wait_gpu, new_mem); 273 evict, no_wait_gpu, new_mem);
274 amdgpu_fence_unref(&fence); 274 fence_put(fence);
275 return r; 275 return r;
276} 276}
277 277
@@ -987,46 +987,48 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
987 uint64_t dst_offset, 987 uint64_t dst_offset,
988 uint32_t byte_count, 988 uint32_t byte_count,
989 struct reservation_object *resv, 989 struct reservation_object *resv,
990 struct amdgpu_fence **fence) 990 struct fence **fence)
991{ 991{
992 struct amdgpu_device *adev = ring->adev; 992 struct amdgpu_device *adev = ring->adev;
993 struct amdgpu_sync sync;
994 uint32_t max_bytes; 993 uint32_t max_bytes;
995 unsigned num_loops, num_dw; 994 unsigned num_loops, num_dw;
995 struct amdgpu_ib *ib;
996 unsigned i; 996 unsigned i;
997 int r; 997 int r;
998 998
999 /* sync other rings */
1000 amdgpu_sync_create(&sync);
1001 if (resv) {
1002 r = amdgpu_sync_resv(adev, &sync, resv, false);
1003 if (r) {
1004 DRM_ERROR("sync failed (%d).\n", r);
1005 amdgpu_sync_free(adev, &sync, NULL);
1006 return r;
1007 }
1008 }
1009
1010 max_bytes = adev->mman.buffer_funcs->copy_max_bytes; 999 max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
1011 num_loops = DIV_ROUND_UP(byte_count, max_bytes); 1000 num_loops = DIV_ROUND_UP(byte_count, max_bytes);
1012 num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; 1001 num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
1013 1002
1014 /* for fence and sync */ 1003 /* for IB padding */
1015 num_dw += 64 + AMDGPU_NUM_SYNCS * 8; 1004 while (num_dw & 0x7)
1005 num_dw++;
1006
1007 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
1008 if (!ib)
1009 return -ENOMEM;
1016 1010
1017 r = amdgpu_ring_lock(ring, num_dw); 1011 r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib);
1018 if (r) { 1012 if (r) {
1019 DRM_ERROR("ring lock failed (%d).\n", r); 1013 kfree(ib);
1020 amdgpu_sync_free(adev, &sync, NULL);
1021 return r; 1014 return r;
1022 } 1015 }
1023 1016
1024 amdgpu_sync_rings(&sync, ring); 1017 ib->length_dw = 0;
1018
1019 if (resv) {
1020 r = amdgpu_sync_resv(adev, &ib->sync, resv,
1021 AMDGPU_FENCE_OWNER_UNDEFINED);
1022 if (r) {
1023 DRM_ERROR("sync failed (%d).\n", r);
1024 goto error_free;
1025 }
1026 }
1025 1027
1026 for (i = 0; i < num_loops; i++) { 1028 for (i = 0; i < num_loops; i++) {
1027 uint32_t cur_size_in_bytes = min(byte_count, max_bytes); 1029 uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
1028 1030
1029 amdgpu_emit_copy_buffer(adev, ring, src_offset, dst_offset, 1031 amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset,
1030 cur_size_in_bytes); 1032 cur_size_in_bytes);
1031 1033
1032 src_offset += cur_size_in_bytes; 1034 src_offset += cur_size_in_bytes;
@@ -1034,17 +1036,24 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
1034 byte_count -= cur_size_in_bytes; 1036 byte_count -= cur_size_in_bytes;
1035 } 1037 }
1036 1038
1037 r = amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_MOVE, fence); 1039 amdgpu_vm_pad_ib(adev, ib);
1038 if (r) { 1040 WARN_ON(ib->length_dw > num_dw);
1039 amdgpu_ring_unlock_undo(ring); 1041 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
1040 amdgpu_sync_free(adev, &sync, NULL); 1042 &amdgpu_vm_free_job,
1041 return r; 1043 AMDGPU_FENCE_OWNER_MOVE,
1042 } 1044 fence);
1043 1045 if (r)
1044 amdgpu_ring_unlock_commit(ring); 1046 goto error_free;
1045 amdgpu_sync_free(adev, &sync, *fence);
1046 1047
1048 if (!amdgpu_enable_scheduler) {
1049 amdgpu_ib_free(adev, ib);
1050 kfree(ib);
1051 }
1047 return 0; 1052 return 0;
1053error_free:
1054 amdgpu_ib_free(adev, ib);
1055 kfree(ib);
1056 return r;
1048} 1057}
1049 1058
1050#if defined(CONFIG_DEBUG_FS) 1059#if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 68369cf1e318..b87355ccfb1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -807,7 +807,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
807} 807}
808 808
809static int amdgpu_uvd_free_job( 809static int amdgpu_uvd_free_job(
810 struct amdgpu_cs_parser *sched_job) 810 struct amdgpu_job *sched_job)
811{ 811{
812 amdgpu_ib_free(sched_job->adev, sched_job->ibs); 812 amdgpu_ib_free(sched_job->adev, sched_job->ibs);
813 kfree(sched_job->ibs); 813 kfree(sched_job->ibs);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 33ee6ae28f37..1a984c934b1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -340,7 +340,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
340} 340}
341 341
342static int amdgpu_vce_free_job( 342static int amdgpu_vce_free_job(
343 struct amdgpu_cs_parser *sched_job) 343 struct amdgpu_job *sched_job)
344{ 344{
345 amdgpu_ib_free(sched_job->adev, sched_job->ibs); 345 amdgpu_ib_free(sched_job->adev, sched_job->ibs);
346 kfree(sched_job->ibs); 346 kfree(sched_job->ibs);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a78a206e176e..83b7ce6f5f72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -200,19 +200,29 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
200 */ 200 */
201void amdgpu_vm_flush(struct amdgpu_ring *ring, 201void amdgpu_vm_flush(struct amdgpu_ring *ring,
202 struct amdgpu_vm *vm, 202 struct amdgpu_vm *vm,
203 struct amdgpu_fence *updates) 203 struct fence *updates)
204{ 204{
205 uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); 205 uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
206 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; 206 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
207 struct amdgpu_fence *flushed_updates = vm_id->flushed_updates; 207 struct fence *flushed_updates = vm_id->flushed_updates;
208 bool is_earlier = false;
209
210 if (flushed_updates && updates) {
211 BUG_ON(flushed_updates->context != updates->context);
212 is_earlier = (updates->seqno - flushed_updates->seqno <=
213 INT_MAX) ? true : false;
214 }
208 215
209 if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates || 216 if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates ||
210 (updates && amdgpu_fence_is_earlier(flushed_updates, updates))) { 217 is_earlier) {
211 218
212 trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); 219 trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
213 vm_id->flushed_updates = amdgpu_fence_ref( 220 if (is_earlier) {
214 amdgpu_fence_later(flushed_updates, updates)); 221 vm_id->flushed_updates = fence_get(updates);
215 amdgpu_fence_unref(&flushed_updates); 222 fence_put(flushed_updates);
223 }
224 if (!flushed_updates)
225 vm_id->flushed_updates = fence_get(updates);
216 vm_id->pd_gpu_addr = pd_addr; 226 vm_id->pd_gpu_addr = pd_addr;
217 amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); 227 amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
218 } 228 }
@@ -306,8 +316,7 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
306 } 316 }
307} 317}
308 318
309static int amdgpu_vm_free_job( 319int amdgpu_vm_free_job(struct amdgpu_job *sched_job)
310 struct amdgpu_cs_parser *sched_job)
311{ 320{
312 int i; 321 int i;
313 for (i = 0; i < sched_job->num_ibs; i++) 322 for (i = 0; i < sched_job->num_ibs; i++)
@@ -1347,7 +1356,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1347 fence_put(vm->page_directory_fence); 1356 fence_put(vm->page_directory_fence);
1348 1357
1349 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1358 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
1350 amdgpu_fence_unref(&vm->ids[i].flushed_updates); 1359 fence_put(vm->ids[i].flushed_updates);
1351 amdgpu_fence_unref(&vm->ids[i].last_id_use); 1360 amdgpu_fence_unref(&vm->ids[i].last_id_use);
1352 } 1361 }
1353 1362
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 2b4242b39b0a..3920c1e346f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -630,6 +630,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
630 gpu_addr = adev->wb.gpu_addr + (index * 4); 630 gpu_addr = adev->wb.gpu_addr + (index * 4);
631 tmp = 0xCAFEDEAD; 631 tmp = 0xCAFEDEAD;
632 adev->wb.wb[index] = cpu_to_le32(tmp); 632 adev->wb.wb[index] = cpu_to_le32(tmp);
633 memset(&ib, 0, sizeof(ib));
633 r = amdgpu_ib_get(ring, NULL, 256, &ib); 634 r = amdgpu_ib_get(ring, NULL, 256, &ib);
634 if (r) { 635 if (r) {
635 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 636 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
@@ -1338,18 +1339,18 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)
1338 * Used by the amdgpu ttm implementation to move pages if 1339 * Used by the amdgpu ttm implementation to move pages if
1339 * registered as the asic copy callback. 1340 * registered as the asic copy callback.
1340 */ 1341 */
1341static void cik_sdma_emit_copy_buffer(struct amdgpu_ring *ring, 1342static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
1342 uint64_t src_offset, 1343 uint64_t src_offset,
1343 uint64_t dst_offset, 1344 uint64_t dst_offset,
1344 uint32_t byte_count) 1345 uint32_t byte_count)
1345{ 1346{
1346 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); 1347 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
1347 amdgpu_ring_write(ring, byte_count); 1348 ib->ptr[ib->length_dw++] = byte_count;
1348 amdgpu_ring_write(ring, 0); /* src/dst endian swap */ 1349 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1349 amdgpu_ring_write(ring, lower_32_bits(src_offset)); 1350 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1350 amdgpu_ring_write(ring, upper_32_bits(src_offset)); 1351 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1351 amdgpu_ring_write(ring, lower_32_bits(dst_offset)); 1352 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1352 amdgpu_ring_write(ring, upper_32_bits(dst_offset)); 1353 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1353} 1354}
1354 1355
1355/** 1356/**
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 9b0cab413677..fab7b236f37f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2660,6 +2660,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
2660 return r; 2660 return r;
2661 } 2661 }
2662 WREG32(scratch, 0xCAFEDEAD); 2662 WREG32(scratch, 0xCAFEDEAD);
2663 memset(&ib, 0, sizeof(ib));
2663 r = amdgpu_ib_get(ring, NULL, 256, &ib); 2664 r = amdgpu_ib_get(ring, NULL, 256, &ib);
2664 if (r) { 2665 if (r) {
2665 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 2666 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 4b68e6306f40..818edb37fa9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -622,6 +622,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
622 return r; 622 return r;
623 } 623 }
624 WREG32(scratch, 0xCAFEDEAD); 624 WREG32(scratch, 0xCAFEDEAD);
625 memset(&ib, 0, sizeof(ib));
625 r = amdgpu_ib_get(ring, NULL, 256, &ib); 626 r = amdgpu_ib_get(ring, NULL, 256, &ib);
626 if (r) { 627 if (r) {
627 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 628 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 9de8104eddeb..715e02d3bfba 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -689,6 +689,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
689 gpu_addr = adev->wb.gpu_addr + (index * 4); 689 gpu_addr = adev->wb.gpu_addr + (index * 4);
690 tmp = 0xCAFEDEAD; 690 tmp = 0xCAFEDEAD;
691 adev->wb.wb[index] = cpu_to_le32(tmp); 691 adev->wb.wb[index] = cpu_to_le32(tmp);
692 memset(&ib, 0, sizeof(ib));
692 r = amdgpu_ib_get(ring, NULL, 256, &ib); 693 r = amdgpu_ib_get(ring, NULL, 256, &ib);
693 if (r) { 694 if (r) {
694 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 695 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
@@ -1349,19 +1350,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
1349 * Used by the amdgpu ttm implementation to move pages if 1350 * Used by the amdgpu ttm implementation to move pages if
1350 * registered as the asic copy callback. 1351 * registered as the asic copy callback.
1351 */ 1352 */
1352static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring, 1353static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
1353 uint64_t src_offset, 1354 uint64_t src_offset,
1354 uint64_t dst_offset, 1355 uint64_t dst_offset,
1355 uint32_t byte_count) 1356 uint32_t byte_count)
1356{ 1357{
1357 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1358 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1358 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); 1359 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1359 amdgpu_ring_write(ring, byte_count); 1360 ib->ptr[ib->length_dw++] = byte_count;
1360 amdgpu_ring_write(ring, 0); /* src/dst endian swap */ 1361 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1361 amdgpu_ring_write(ring, lower_32_bits(src_offset)); 1362 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1362 amdgpu_ring_write(ring, upper_32_bits(src_offset)); 1363 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1363 amdgpu_ring_write(ring, lower_32_bits(dst_offset)); 1364 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1364 amdgpu_ring_write(ring, upper_32_bits(dst_offset)); 1365 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1365} 1366}
1366 1367
1367/** 1368/**
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 029f3455f9f9..67128c8e78b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -810,6 +810,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
810 gpu_addr = adev->wb.gpu_addr + (index * 4); 810 gpu_addr = adev->wb.gpu_addr + (index * 4);
811 tmp = 0xCAFEDEAD; 811 tmp = 0xCAFEDEAD;
812 adev->wb.wb[index] = cpu_to_le32(tmp); 812 adev->wb.wb[index] = cpu_to_le32(tmp);
813 memset(&ib, 0, sizeof(ib));
813 r = amdgpu_ib_get(ring, NULL, 256, &ib); 814 r = amdgpu_ib_get(ring, NULL, 256, &ib);
814 if (r) { 815 if (r) {
815 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 816 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
@@ -1473,19 +1474,19 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)
1473 * Used by the amdgpu ttm implementation to move pages if 1474 * Used by the amdgpu ttm implementation to move pages if
1474 * registered as the asic copy callback. 1475 * registered as the asic copy callback.
1475 */ 1476 */
1476static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ring *ring, 1477static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
1477 uint64_t src_offset, 1478 uint64_t src_offset,
1478 uint64_t dst_offset, 1479 uint64_t dst_offset,
1479 uint32_t byte_count) 1480 uint32_t byte_count)
1480{ 1481{
1481 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1482 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1482 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); 1483 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1483 amdgpu_ring_write(ring, byte_count); 1484 ib->ptr[ib->length_dw++] = byte_count;
1484 amdgpu_ring_write(ring, 0); /* src/dst endian swap */ 1485 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1485 amdgpu_ring_write(ring, lower_32_bits(src_offset)); 1486 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1486 amdgpu_ring_write(ring, upper_32_bits(src_offset)); 1487 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1487 amdgpu_ring_write(ring, lower_32_bits(dst_offset)); 1488 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1488 amdgpu_ring_write(ring, upper_32_bits(dst_offset)); 1489 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1489} 1490}
1490 1491
1491/** 1492/**
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 265d3e2f63cc..d99fe90991dc 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -27,30 +27,32 @@
27#include <drm/drmP.h> 27#include <drm/drmP.h>
28#include "gpu_scheduler.h" 28#include "gpu_scheduler.h"
29 29
30static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
31
30/* Initialize a given run queue struct */ 32/* Initialize a given run queue struct */
31static void amd_sched_rq_init(struct amd_sched_rq *rq) 33static void amd_sched_rq_init(struct amd_sched_rq *rq)
32{ 34{
35 spin_lock_init(&rq->lock);
33 INIT_LIST_HEAD(&rq->entities); 36 INIT_LIST_HEAD(&rq->entities);
34 mutex_init(&rq->lock);
35 rq->current_entity = NULL; 37 rq->current_entity = NULL;
36} 38}
37 39
38static void amd_sched_rq_add_entity(struct amd_sched_rq *rq, 40static void amd_sched_rq_add_entity(struct amd_sched_rq *rq,
39 struct amd_sched_entity *entity) 41 struct amd_sched_entity *entity)
40{ 42{
41 mutex_lock(&rq->lock); 43 spin_lock(&rq->lock);
42 list_add_tail(&entity->list, &rq->entities); 44 list_add_tail(&entity->list, &rq->entities);
43 mutex_unlock(&rq->lock); 45 spin_unlock(&rq->lock);
44} 46}
45 47
46static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq, 48static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
47 struct amd_sched_entity *entity) 49 struct amd_sched_entity *entity)
48{ 50{
49 mutex_lock(&rq->lock); 51 spin_lock(&rq->lock);
50 list_del_init(&entity->list); 52 list_del_init(&entity->list);
51 if (rq->current_entity == entity) 53 if (rq->current_entity == entity)
52 rq->current_entity = NULL; 54 rq->current_entity = NULL;
53 mutex_unlock(&rq->lock); 55 spin_unlock(&rq->lock);
54} 56}
55 57
56/** 58/**
@@ -61,12 +63,16 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
61static struct amd_sched_entity * 63static struct amd_sched_entity *
62amd_sched_rq_select_entity(struct amd_sched_rq *rq) 64amd_sched_rq_select_entity(struct amd_sched_rq *rq)
63{ 65{
64 struct amd_sched_entity *entity = rq->current_entity; 66 struct amd_sched_entity *entity;
67
68 spin_lock(&rq->lock);
65 69
70 entity = rq->current_entity;
66 if (entity) { 71 if (entity) {
67 list_for_each_entry_continue(entity, &rq->entities, list) { 72 list_for_each_entry_continue(entity, &rq->entities, list) {
68 if (!kfifo_is_empty(&entity->job_queue)) { 73 if (!kfifo_is_empty(&entity->job_queue)) {
69 rq->current_entity = entity; 74 rq->current_entity = entity;
75 spin_unlock(&rq->lock);
70 return rq->current_entity; 76 return rq->current_entity;
71 } 77 }
72 } 78 }
@@ -76,6 +82,7 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq)
76 82
77 if (!kfifo_is_empty(&entity->job_queue)) { 83 if (!kfifo_is_empty(&entity->job_queue)) {
78 rq->current_entity = entity; 84 rq->current_entity = entity;
85 spin_unlock(&rq->lock);
79 return rq->current_entity; 86 return rq->current_entity;
80 } 87 }
81 88
@@ -83,76 +90,9 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq)
83 break; 90 break;
84 } 91 }
85 92
86 return NULL; 93 spin_unlock(&rq->lock);
87}
88 94
89/** 95 return NULL;
90 * Note: This function should only been called inside scheduler main
91 * function for thread safety, there is no other protection here.
92 * return ture if scheduler has something ready to run.
93 *
94 * For active_hw_rq, there is only one producer(scheduler thread) and
95 * one consumer(ISR). It should be safe to use this function in scheduler
96 * main thread to decide whether to continue emit more IBs.
97*/
98static bool is_scheduler_ready(struct amd_gpu_scheduler *sched)
99{
100 unsigned long flags;
101 bool full;
102
103 spin_lock_irqsave(&sched->queue_lock, flags);
104 full = atomic64_read(&sched->hw_rq_count) <
105 sched->hw_submission_limit ? true : false;
106 spin_unlock_irqrestore(&sched->queue_lock, flags);
107
108 return full;
109}
110
111/**
112 * Select next entity from the kernel run queue, if not available,
113 * return null.
114*/
115static struct amd_sched_entity *
116kernel_rq_select_context(struct amd_gpu_scheduler *sched)
117{
118 struct amd_sched_entity *sched_entity;
119 struct amd_sched_rq *rq = &sched->kernel_rq;
120
121 mutex_lock(&rq->lock);
122 sched_entity = amd_sched_rq_select_entity(rq);
123 mutex_unlock(&rq->lock);
124 return sched_entity;
125}
126
127/**
128 * Select next entity containing real IB submissions
129*/
130static struct amd_sched_entity *
131select_context(struct amd_gpu_scheduler *sched)
132{
133 struct amd_sched_entity *wake_entity = NULL;
134 struct amd_sched_entity *tmp;
135 struct amd_sched_rq *rq;
136
137 if (!is_scheduler_ready(sched))
138 return NULL;
139
140 /* Kernel run queue has higher priority than normal run queue*/
141 tmp = kernel_rq_select_context(sched);
142 if (tmp != NULL)
143 goto exit;
144
145 rq = &sched->sched_rq;
146 mutex_lock(&rq->lock);
147 tmp = amd_sched_rq_select_entity(rq);
148 mutex_unlock(&rq->lock);
149exit:
150 if (sched->current_entity && (sched->current_entity != tmp))
151 wake_entity = sched->current_entity;
152 sched->current_entity = tmp;
153 if (wake_entity && wake_entity->need_wakeup)
154 wake_up(&wake_entity->wait_queue);
155 return tmp;
156} 96}
157 97
158/** 98/**
@@ -171,31 +111,20 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
171 struct amd_sched_rq *rq, 111 struct amd_sched_rq *rq,
172 uint32_t jobs) 112 uint32_t jobs)
173{ 113{
174 uint64_t seq_ring = 0;
175 char name[20];
176
177 if (!(sched && entity && rq)) 114 if (!(sched && entity && rq))
178 return -EINVAL; 115 return -EINVAL;
179 116
180 memset(entity, 0, sizeof(struct amd_sched_entity)); 117 memset(entity, 0, sizeof(struct amd_sched_entity));
181 seq_ring = ((uint64_t)sched->ring_id) << 60;
182 spin_lock_init(&entity->lock);
183 entity->belongto_rq = rq; 118 entity->belongto_rq = rq;
184 entity->scheduler = sched; 119 entity->scheduler = sched;
185 init_waitqueue_head(&entity->wait_queue);
186 init_waitqueue_head(&entity->wait_emit);
187 entity->fence_context = fence_context_alloc(1); 120 entity->fence_context = fence_context_alloc(1);
188 snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context);
189 memcpy(entity->name, name, 20);
190 entity->need_wakeup = false;
191 if(kfifo_alloc(&entity->job_queue, 121 if(kfifo_alloc(&entity->job_queue,
192 jobs * sizeof(void *), 122 jobs * sizeof(void *),
193 GFP_KERNEL)) 123 GFP_KERNEL))
194 return -EINVAL; 124 return -EINVAL;
195 125
196 spin_lock_init(&entity->queue_lock); 126 spin_lock_init(&entity->queue_lock);
197 atomic64_set(&entity->last_queued_v_seq, seq_ring); 127 atomic_set(&entity->fence_seq, 0);
198 atomic64_set(&entity->last_signaled_v_seq, seq_ring);
199 128
200 /* Add the entity to the run queue */ 129 /* Add the entity to the run queue */
201 amd_sched_rq_add_entity(rq, entity); 130 amd_sched_rq_add_entity(rq, entity);
@@ -210,23 +139,24 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
210 * 139 *
211 * return true if entity is initialized, false otherwise 140 * return true if entity is initialized, false otherwise
212*/ 141*/
213static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched, 142static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched,
214 struct amd_sched_entity *entity) 143 struct amd_sched_entity *entity)
215{ 144{
216 return entity->scheduler == sched && 145 return entity->scheduler == sched &&
217 entity->belongto_rq != NULL; 146 entity->belongto_rq != NULL;
218} 147}
219 148
220static bool is_context_entity_idle(struct amd_gpu_scheduler *sched, 149/**
221 struct amd_sched_entity *entity) 150 * Check if entity is idle
151 *
152 * @entity The pointer to a valid scheduler entity
153 *
154 * Return true if entity don't has any unscheduled jobs.
155 */
156static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
222{ 157{
223 /** 158 rmb();
224 * Idle means no pending IBs, and the entity is not 159 if (kfifo_is_empty(&entity->job_queue))
225 * currently being used.
226 */
227 barrier();
228 if ((sched->current_entity != entity) &&
229 kfifo_is_empty(&entity->job_queue))
230 return true; 160 return true;
231 161
232 return false; 162 return false;
@@ -238,84 +168,114 @@ static bool is_context_entity_idle(struct amd_gpu_scheduler *sched,
238 * @sched Pointer to scheduler instance 168 * @sched Pointer to scheduler instance
239 * @entity The pointer to a valid scheduler entity 169 * @entity The pointer to a valid scheduler entity
240 * 170 *
241 * return 0 if succeed. negative error code on failure 171 * Cleanup and free the allocated resources.
242 */ 172 */
243int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, 173void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
244 struct amd_sched_entity *entity) 174 struct amd_sched_entity *entity)
245{ 175{
246 int r = 0;
247 struct amd_sched_rq *rq = entity->belongto_rq; 176 struct amd_sched_rq *rq = entity->belongto_rq;
248 177
249 if (!is_context_entity_initialized(sched, entity)) 178 if (!amd_sched_entity_is_initialized(sched, entity))
250 return 0; 179 return;
251 entity->need_wakeup = true; 180
252 /** 181 /**
253 * The client will not queue more IBs during this fini, consume existing 182 * The client will not queue more IBs during this fini, consume existing
254 * queued IBs 183 * queued IBs
255 */ 184 */
256 r = wait_event_timeout( 185 wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity));
257 entity->wait_queue,
258 is_context_entity_idle(sched, entity),
259 msecs_to_jiffies(AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS)
260 ) ? 0 : -1;
261
262 if (r) {
263 if (entity->is_pending)
264 DRM_INFO("Entity %p is in waiting state during fini,\
265 all pending ibs will be canceled.\n",
266 entity);
267 }
268 186
269 amd_sched_rq_remove_entity(rq, entity); 187 amd_sched_rq_remove_entity(rq, entity);
270 kfifo_free(&entity->job_queue); 188 kfifo_free(&entity->job_queue);
271 return r;
272} 189}
273 190
274/** 191/**
275 * Submit a normal job to the job queue 192 * Helper to submit a job to the job queue
276 * 193 *
277 * @sched The pointer to the scheduler
278 * @c_entity The pointer to amd_sched_entity
279 * @job The pointer to job required to submit 194 * @job The pointer to job required to submit
280 * return 0 if succeed. -1 if failed. 195 *
281 * -2 indicate queue is full for this client, client should wait untill 196 * Returns true if we could submit the job.
282 * scheduler consum some queued command. 197 */
283 * -1 other fail. 198static bool amd_sched_entity_in(struct amd_sched_job *job)
284*/ 199{
285int amd_sched_push_job(struct amd_gpu_scheduler *sched, 200 struct amd_sched_entity *entity = job->s_entity;
286 struct amd_sched_entity *c_entity, 201 bool added, first = false;
287 void *data, 202
288 struct amd_sched_fence **fence) 203 spin_lock(&entity->queue_lock);
204 added = kfifo_in(&entity->job_queue, &job, sizeof(job)) == sizeof(job);
205
206 if (added && kfifo_len(&entity->job_queue) == sizeof(job))
207 first = true;
208
209 spin_unlock(&entity->queue_lock);
210
211 /* first job wakes up scheduler */
212 if (first)
213 amd_sched_wakeup(job->sched);
214
215 return added;
216}
217
218/**
219 * Submit a job to the job queue
220 *
221 * @job The pointer to job required to submit
222 *
223 * Returns 0 for success, negative error code otherwise.
224 */
225int amd_sched_entity_push_job(struct amd_sched_job *sched_job)
289{ 226{
290 struct amd_sched_job *job; 227 struct amd_sched_entity *entity = sched_job->s_entity;
228 struct amd_sched_fence *fence = amd_sched_fence_create(
229 entity, sched_job->owner);
230 int r;
291 231
292 if (!fence) 232 if (!fence)
293 return -EINVAL;
294 job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL);
295 if (!job)
296 return -ENOMEM; 233 return -ENOMEM;
297 job->sched = sched; 234
298 job->s_entity = c_entity; 235 fence_get(&fence->base);
299 job->data = data; 236 sched_job->s_fence = fence;
300 *fence = amd_sched_fence_create(c_entity); 237
301 if ((*fence) == NULL) { 238 r = wait_event_interruptible(entity->scheduler->job_scheduled,
302 kfree(job); 239 amd_sched_entity_in(sched_job));
303 return -EINVAL; 240
304 } 241 return r;
305 fence_get(&(*fence)->base); 242}
306 job->s_fence = *fence; 243
307 while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), 244/**
308 &c_entity->queue_lock) != sizeof(void *)) { 245 * Return ture if we can push more jobs to the hw.
309 /** 246 */
310 * Current context used up all its IB slots 247static bool amd_sched_ready(struct amd_gpu_scheduler *sched)
311 * wait here, or need to check whether GPU is hung 248{
312 */ 249 return atomic_read(&sched->hw_rq_count) <
313 schedule(); 250 sched->hw_submission_limit;
314 } 251}
315 /* first job wake up scheduler */ 252
316 if ((kfifo_len(&c_entity->job_queue) / sizeof(void *)) == 1) 253/**
317 wake_up_interruptible(&sched->wait_queue); 254 * Wake up the scheduler when it is ready
318 return 0; 255 */
256static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
257{
258 if (amd_sched_ready(sched))
259 wake_up_interruptible(&sched->wake_up_worker);
260}
261
262/**
263 * Select next entity containing real IB submissions
264*/
265static struct amd_sched_entity *
266amd_sched_select_context(struct amd_gpu_scheduler *sched)
267{
268 struct amd_sched_entity *tmp;
269
270 if (!amd_sched_ready(sched))
271 return NULL;
272
273 /* Kernel run queue has higher priority than normal run queue*/
274 tmp = amd_sched_rq_select_entity(&sched->kernel_rq);
275 if (tmp == NULL)
276 tmp = amd_sched_rq_select_entity(&sched->sched_rq);
277
278 return tmp;
319} 279}
320 280
321static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) 281static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
@@ -323,52 +283,41 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
323 struct amd_sched_job *sched_job = 283 struct amd_sched_job *sched_job =
324 container_of(cb, struct amd_sched_job, cb); 284 container_of(cb, struct amd_sched_job, cb);
325 struct amd_gpu_scheduler *sched; 285 struct amd_gpu_scheduler *sched;
326 unsigned long flags;
327 286
328 sched = sched_job->sched; 287 sched = sched_job->sched;
329 atomic64_set(&sched_job->s_entity->last_signaled_v_seq,
330 sched_job->s_fence->v_seq);
331 amd_sched_fence_signal(sched_job->s_fence); 288 amd_sched_fence_signal(sched_job->s_fence);
332 spin_lock_irqsave(&sched->queue_lock, flags); 289 atomic_dec(&sched->hw_rq_count);
333 list_del(&sched_job->list);
334 atomic64_dec(&sched->hw_rq_count);
335 spin_unlock_irqrestore(&sched->queue_lock, flags);
336
337 sched->ops->process_job(sched, sched_job);
338 fence_put(&sched_job->s_fence->base); 290 fence_put(&sched_job->s_fence->base);
339 kfree(sched_job); 291 sched->ops->process_job(sched_job);
340 wake_up_interruptible(&sched->wait_queue); 292 wake_up_interruptible(&sched->wake_up_worker);
341} 293}
342 294
343static int amd_sched_main(void *param) 295static int amd_sched_main(void *param)
344{ 296{
345 int r;
346 struct amd_sched_job *job;
347 struct sched_param sparam = {.sched_priority = 1}; 297 struct sched_param sparam = {.sched_priority = 1};
348 struct amd_sched_entity *c_entity = NULL;
349 struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param; 298 struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
299 int r;
350 300
351 sched_setscheduler(current, SCHED_FIFO, &sparam); 301 sched_setscheduler(current, SCHED_FIFO, &sparam);
352 302
353 while (!kthread_should_stop()) { 303 while (!kthread_should_stop()) {
304 struct amd_sched_entity *c_entity = NULL;
305 struct amd_sched_job *job;
354 struct fence *fence; 306 struct fence *fence;
355 307
356 wait_event_interruptible(sched->wait_queue, 308 wait_event_interruptible(sched->wake_up_worker,
357 is_scheduler_ready(sched) && 309 kthread_should_stop() ||
358 (c_entity = select_context(sched))); 310 (c_entity = amd_sched_select_context(sched)));
311
312 if (!c_entity)
313 continue;
314
359 r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *)); 315 r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *));
360 if (r != sizeof(void *)) 316 if (r != sizeof(void *))
361 continue; 317 continue;
362 r = sched->ops->prepare_job(sched, c_entity, job); 318 atomic_inc(&sched->hw_rq_count);
363 if (!r) { 319
364 unsigned long flags; 320 fence = sched->ops->run_job(job);
365 spin_lock_irqsave(&sched->queue_lock, flags);
366 list_add_tail(&job->list, &sched->active_hw_rq);
367 atomic64_inc(&sched->hw_rq_count);
368 spin_unlock_irqrestore(&sched->queue_lock, flags);
369 }
370 mutex_lock(&sched->sched_lock);
371 fence = sched->ops->run_job(sched, c_entity, job);
372 if (fence) { 321 if (fence) {
373 r = fence_add_callback(fence, &job->cb, 322 r = fence_add_callback(fence, &job->cb,
374 amd_sched_process_job); 323 amd_sched_process_job);
@@ -378,7 +327,8 @@ static int amd_sched_main(void *param)
378 DRM_ERROR("fence add callback failed (%d)\n", r); 327 DRM_ERROR("fence add callback failed (%d)\n", r);
379 fence_put(fence); 328 fence_put(fence);
380 } 329 }
381 mutex_unlock(&sched->sched_lock); 330
331 wake_up(&sched->job_scheduled);
382 } 332 }
383 return 0; 333 return 0;
384} 334}
@@ -386,53 +336,42 @@ static int amd_sched_main(void *param)
386/** 336/**
387 * Create a gpu scheduler 337 * Create a gpu scheduler
388 * 338 *
389 * @device The device context for this scheduler 339 * @ops The backend operations for this scheduler.
390 * @ops The backend operations for this scheduler. 340 * @ring The the ring id for the scheduler.
391 * @id The scheduler is per ring, here is ring id. 341 * @hw_submissions Number of hw submissions to do.
392 * @granularity The minumum ms unit the scheduler will scheduled.
393 * @preemption Indicate whether this ring support preemption, 0 is no.
394 * 342 *
395 * return the pointer to scheduler for success, otherwise return NULL 343 * Return the pointer to scheduler for success, otherwise return NULL
396*/ 344*/
397struct amd_gpu_scheduler *amd_sched_create(void *device, 345struct amd_gpu_scheduler *amd_sched_create(struct amd_sched_backend_ops *ops,
398 struct amd_sched_backend_ops *ops, 346 unsigned ring, unsigned hw_submission,
399 unsigned ring, 347 void *priv)
400 unsigned granularity,
401 unsigned preemption,
402 unsigned hw_submission)
403{ 348{
404 struct amd_gpu_scheduler *sched; 349 struct amd_gpu_scheduler *sched;
405 char name[20];
406 350
407 sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL); 351 sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL);
408 if (!sched) 352 if (!sched)
409 return NULL; 353 return NULL;
410 354
411 sched->device = device;
412 sched->ops = ops; 355 sched->ops = ops;
413 sched->granularity = granularity;
414 sched->ring_id = ring; 356 sched->ring_id = ring;
415 sched->preemption = preemption;
416 sched->hw_submission_limit = hw_submission; 357 sched->hw_submission_limit = hw_submission;
417 snprintf(name, sizeof(name), "gpu_sched[%d]", ring); 358 sched->priv = priv;
418 mutex_init(&sched->sched_lock); 359 snprintf(sched->name, sizeof(sched->name), "amdgpu[%d]", ring);
419 spin_lock_init(&sched->queue_lock);
420 amd_sched_rq_init(&sched->sched_rq); 360 amd_sched_rq_init(&sched->sched_rq);
421 amd_sched_rq_init(&sched->kernel_rq); 361 amd_sched_rq_init(&sched->kernel_rq);
422 362
423 init_waitqueue_head(&sched->wait_queue); 363 init_waitqueue_head(&sched->wake_up_worker);
424 INIT_LIST_HEAD(&sched->active_hw_rq); 364 init_waitqueue_head(&sched->job_scheduled);
425 atomic64_set(&sched->hw_rq_count, 0); 365 atomic_set(&sched->hw_rq_count, 0);
426 /* Each scheduler will run on a seperate kernel thread */ 366 /* Each scheduler will run on a seperate kernel thread */
427 sched->thread = kthread_create(amd_sched_main, sched, name); 367 sched->thread = kthread_run(amd_sched_main, sched, sched->name);
428 if (sched->thread) { 368 if (IS_ERR(sched->thread)) {
429 wake_up_process(sched->thread); 369 DRM_ERROR("Failed to create scheduler for id %d.\n", ring);
430 return sched; 370 kfree(sched);
371 return NULL;
431 } 372 }
432 373
433 DRM_ERROR("Failed to create scheduler for id %d.\n", ring); 374 return sched;
434 kfree(sched);
435 return NULL;
436} 375}
437 376
438/** 377/**
@@ -448,15 +387,3 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched)
448 kfree(sched); 387 kfree(sched);
449 return 0; 388 return 0;
450} 389}
451
452/**
453 * Get next queued sequence number
454 *
455 * @entity The context entity
456 *
457 * return the next queued sequence number
458*/
459uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity)
460{
461 return atomic64_read(&c_entity->last_queued_v_seq) + 1;
462}
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index ceb5918bfbeb..e797796dcad7 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -27,8 +27,6 @@
27#include <linux/kfifo.h> 27#include <linux/kfifo.h>
28#include <linux/fence.h> 28#include <linux/fence.h>
29 29
30#define AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
31
32struct amd_gpu_scheduler; 30struct amd_gpu_scheduler;
33struct amd_sched_rq; 31struct amd_sched_rq;
34 32
@@ -41,20 +39,12 @@ struct amd_sched_rq;
41struct amd_sched_entity { 39struct amd_sched_entity {
42 struct list_head list; 40 struct list_head list;
43 struct amd_sched_rq *belongto_rq; 41 struct amd_sched_rq *belongto_rq;
44 spinlock_t lock; 42 atomic_t fence_seq;
45 /* the virtual_seq is unique per context per ring */
46 atomic64_t last_queued_v_seq;
47 atomic64_t last_signaled_v_seq;
48 /* the job_queue maintains the jobs submitted by clients */ 43 /* the job_queue maintains the jobs submitted by clients */
49 struct kfifo job_queue; 44 struct kfifo job_queue;
50 spinlock_t queue_lock; 45 spinlock_t queue_lock;
51 struct amd_gpu_scheduler *scheduler; 46 struct amd_gpu_scheduler *scheduler;
52 wait_queue_head_t wait_queue;
53 wait_queue_head_t wait_emit;
54 bool is_pending;
55 uint64_t fence_context; 47 uint64_t fence_context;
56 char name[20];
57 bool need_wakeup;
58}; 48};
59 49
60/** 50/**
@@ -63,26 +53,24 @@ struct amd_sched_entity {
63 * the next entity to emit commands from. 53 * the next entity to emit commands from.
64*/ 54*/
65struct amd_sched_rq { 55struct amd_sched_rq {
66 struct mutex lock; 56 spinlock_t lock;
67 struct list_head entities; 57 struct list_head entities;
68 struct amd_sched_entity *current_entity; 58 struct amd_sched_entity *current_entity;
69}; 59};
70 60
71struct amd_sched_fence { 61struct amd_sched_fence {
72 struct fence base; 62 struct fence base;
73 struct fence_cb cb; 63 struct amd_gpu_scheduler *scheduler;
74 struct amd_sched_entity *entity;
75 uint64_t v_seq;
76 spinlock_t lock; 64 spinlock_t lock;
65 void *owner;
77}; 66};
78 67
79struct amd_sched_job { 68struct amd_sched_job {
80 struct list_head list;
81 struct fence_cb cb; 69 struct fence_cb cb;
82 struct amd_gpu_scheduler *sched; 70 struct amd_gpu_scheduler *sched;
83 struct amd_sched_entity *s_entity; 71 struct amd_sched_entity *s_entity;
84 void *data;
85 struct amd_sched_fence *s_fence; 72 struct amd_sched_fence *s_fence;
73 void *owner;
86}; 74};
87 75
88extern const struct fence_ops amd_sched_fence_ops; 76extern const struct fence_ops amd_sched_fence_ops;
@@ -101,61 +89,42 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
101 * these functions should be implemented in driver side 89 * these functions should be implemented in driver side
102*/ 90*/
103struct amd_sched_backend_ops { 91struct amd_sched_backend_ops {
104 int (*prepare_job)(struct amd_gpu_scheduler *sched, 92 struct fence *(*run_job)(struct amd_sched_job *job);
105 struct amd_sched_entity *c_entity, 93 void (*process_job)(struct amd_sched_job *job);
106 struct amd_sched_job *job);
107 struct fence *(*run_job)(struct amd_gpu_scheduler *sched,
108 struct amd_sched_entity *c_entity,
109 struct amd_sched_job *job);
110 void (*process_job)(struct amd_gpu_scheduler *sched,
111 struct amd_sched_job *job);
112}; 94};
113 95
114/** 96/**
115 * One scheduler is implemented for each hardware ring 97 * One scheduler is implemented for each hardware ring
116*/ 98*/
117struct amd_gpu_scheduler { 99struct amd_gpu_scheduler {
118 void *device;
119 struct task_struct *thread; 100 struct task_struct *thread;
120 struct amd_sched_rq sched_rq; 101 struct amd_sched_rq sched_rq;
121 struct amd_sched_rq kernel_rq; 102 struct amd_sched_rq kernel_rq;
122 struct list_head active_hw_rq; 103 atomic_t hw_rq_count;
123 atomic64_t hw_rq_count;
124 struct amd_sched_backend_ops *ops; 104 struct amd_sched_backend_ops *ops;
125 uint32_t ring_id; 105 uint32_t ring_id;
126 uint32_t granularity; /* in ms unit */ 106 wait_queue_head_t wake_up_worker;
127 uint32_t preemption; 107 wait_queue_head_t job_scheduled;
128 wait_queue_head_t wait_queue;
129 struct amd_sched_entity *current_entity;
130 struct mutex sched_lock;
131 spinlock_t queue_lock;
132 uint32_t hw_submission_limit; 108 uint32_t hw_submission_limit;
109 char name[20];
110 void *priv;
133}; 111};
134 112
135struct amd_gpu_scheduler *amd_sched_create(void *device, 113struct amd_gpu_scheduler *
136 struct amd_sched_backend_ops *ops, 114amd_sched_create(struct amd_sched_backend_ops *ops,
137 uint32_t ring, 115 uint32_t ring, uint32_t hw_submission, void *priv);
138 uint32_t granularity,
139 uint32_t preemption,
140 uint32_t hw_submission);
141int amd_sched_destroy(struct amd_gpu_scheduler *sched); 116int amd_sched_destroy(struct amd_gpu_scheduler *sched);
142 117
143int amd_sched_push_job(struct amd_gpu_scheduler *sched,
144 struct amd_sched_entity *c_entity,
145 void *data,
146 struct amd_sched_fence **fence);
147
148int amd_sched_entity_init(struct amd_gpu_scheduler *sched, 118int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
149 struct amd_sched_entity *entity, 119 struct amd_sched_entity *entity,
150 struct amd_sched_rq *rq, 120 struct amd_sched_rq *rq,
151 uint32_t jobs); 121 uint32_t jobs);
152int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, 122void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
153 struct amd_sched_entity *entity); 123 struct amd_sched_entity *entity);
154 124int amd_sched_entity_push_job(struct amd_sched_job *sched_job);
155uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity);
156 125
157struct amd_sched_fence *amd_sched_fence_create( 126struct amd_sched_fence *amd_sched_fence_create(
158 struct amd_sched_entity *s_entity); 127 struct amd_sched_entity *s_entity, void *owner);
159void amd_sched_fence_signal(struct amd_sched_fence *fence); 128void amd_sched_fence_signal(struct amd_sched_fence *fence);
160 129
161 130
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index a4751598c0b4..e62c37920e11 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -27,19 +27,22 @@
27#include <drm/drmP.h> 27#include <drm/drmP.h>
28#include "gpu_scheduler.h" 28#include "gpu_scheduler.h"
29 29
30struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity) 30struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity, void *owner)
31{ 31{
32 struct amd_sched_fence *fence = NULL; 32 struct amd_sched_fence *fence = NULL;
33 unsigned seq;
34
33 fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); 35 fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL);
34 if (fence == NULL) 36 if (fence == NULL)
35 return NULL; 37 return NULL;
36 fence->v_seq = atomic64_inc_return(&s_entity->last_queued_v_seq); 38 fence->owner = owner;
37 fence->entity = s_entity; 39 fence->scheduler = s_entity->scheduler;
38 spin_lock_init(&fence->lock); 40 spin_lock_init(&fence->lock);
39 fence_init(&fence->base, &amd_sched_fence_ops, 41
40 &fence->lock, 42 seq = atomic_inc_return(&s_entity->fence_seq);
41 s_entity->fence_context, 43 fence_init(&fence->base, &amd_sched_fence_ops, &fence->lock,
42 fence->v_seq); 44 s_entity->fence_context, seq);
45
43 return fence; 46 return fence;
44} 47}
45 48
@@ -60,7 +63,7 @@ static const char *amd_sched_fence_get_driver_name(struct fence *fence)
60static const char *amd_sched_fence_get_timeline_name(struct fence *f) 63static const char *amd_sched_fence_get_timeline_name(struct fence *f)
61{ 64{
62 struct amd_sched_fence *fence = to_amd_sched_fence(f); 65 struct amd_sched_fence *fence = to_amd_sched_fence(f);
63 return (const char *)fence->entity->name; 66 return (const char *)fence->scheduler->name;
64} 67}
65 68
66static bool amd_sched_fence_enable_signaling(struct fence *f) 69static bool amd_sched_fence_enable_signaling(struct fence *f)
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 94b21ae70ef7..5a2cafb4f1bc 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -95,6 +95,11 @@ void radeon_connector_hotplug(struct drm_connector *connector)
95 if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) { 95 if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) {
96 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); 96 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
97 } else if (radeon_dp_needs_link_train(radeon_connector)) { 97 } else if (radeon_dp_needs_link_train(radeon_connector)) {
98 /* Don't try to start link training before we
99 * have the dpcd */
100 if (!radeon_dp_getdpcd(radeon_connector))
101 return;
102
98 /* set it to OFF so that drm_helper_connector_dpms() 103 /* set it to OFF so that drm_helper_connector_dpms()
99 * won't return immediately since the current state 104 * won't return immediately since the current state
100 * is ON at this point. 105 * is ON at this point.