aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h120
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c177
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c101
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h94
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c138
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c302
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c9
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h24
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.c24
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.h6
-rw-r--r--drivers/gpu/drm/amd/scheduler/sched_fence.c10
21 files changed, 716 insertions, 394 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 615ce6d464fb..306f75700bf8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -389,7 +389,6 @@ struct amdgpu_clock {
389 * Fences. 389 * Fences.
390 */ 390 */
391struct amdgpu_fence_driver { 391struct amdgpu_fence_driver {
392 struct amdgpu_ring *ring;
393 uint64_t gpu_addr; 392 uint64_t gpu_addr;
394 volatile uint32_t *cpu_addr; 393 volatile uint32_t *cpu_addr;
395 /* sync_seq is protected by ring emission lock */ 394 /* sync_seq is protected by ring emission lock */
@@ -398,7 +397,7 @@ struct amdgpu_fence_driver {
398 bool initialized; 397 bool initialized;
399 struct amdgpu_irq_src *irq_src; 398 struct amdgpu_irq_src *irq_src;
400 unsigned irq_type; 399 unsigned irq_type;
401 struct delayed_work lockup_work; 400 struct timer_list fallback_timer;
402 wait_queue_head_t fence_queue; 401 wait_queue_head_t fence_queue;
403}; 402};
404 403
@@ -917,8 +916,8 @@ struct amdgpu_ring {
917#define AMDGPU_VM_FAULT_STOP_ALWAYS 2 916#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
918 917
919struct amdgpu_vm_pt { 918struct amdgpu_vm_pt {
920 struct amdgpu_bo *bo; 919 struct amdgpu_bo *bo;
921 uint64_t addr; 920 uint64_t addr;
922}; 921};
923 922
924struct amdgpu_vm_id { 923struct amdgpu_vm_id {
@@ -926,8 +925,6 @@ struct amdgpu_vm_id {
926 uint64_t pd_gpu_addr; 925 uint64_t pd_gpu_addr;
927 /* last flushed PD/PT update */ 926 /* last flushed PD/PT update */
928 struct fence *flushed_updates; 927 struct fence *flushed_updates;
929 /* last use of vmid */
930 struct fence *last_id_use;
931}; 928};
932 929
933struct amdgpu_vm { 930struct amdgpu_vm {
@@ -957,24 +954,70 @@ struct amdgpu_vm {
957 954
958 /* for id and flush management per ring */ 955 /* for id and flush management per ring */
959 struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS]; 956 struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS];
957 /* for interval tree */
958 spinlock_t it_lock;
960}; 959};
961 960
962struct amdgpu_vm_manager { 961struct amdgpu_vm_manager {
963 struct fence *active[AMDGPU_NUM_VM]; 962 struct {
964 uint32_t max_pfn; 963 struct fence *active;
964 atomic_long_t owner;
965 } ids[AMDGPU_NUM_VM];
966
967 uint32_t max_pfn;
965 /* number of VMIDs */ 968 /* number of VMIDs */
966 unsigned nvm; 969 unsigned nvm;
967 /* vram base address for page table entry */ 970 /* vram base address for page table entry */
968 u64 vram_base_offset; 971 u64 vram_base_offset;
969 /* is vm enabled? */ 972 /* is vm enabled? */
970 bool enabled; 973 bool enabled;
971 /* for hw to save the PD addr on suspend/resume */
972 uint32_t saved_table_addr[AMDGPU_NUM_VM];
973 /* vm pte handling */ 974 /* vm pte handling */
974 const struct amdgpu_vm_pte_funcs *vm_pte_funcs; 975 const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
975 struct amdgpu_ring *vm_pte_funcs_ring; 976 struct amdgpu_ring *vm_pte_funcs_ring;
976}; 977};
977 978
979void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
980int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
981void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
982struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
983 struct amdgpu_vm *vm,
984 struct list_head *head);
985int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
986 struct amdgpu_sync *sync);
987void amdgpu_vm_flush(struct amdgpu_ring *ring,
988 struct amdgpu_vm *vm,
989 struct fence *updates);
990void amdgpu_vm_fence(struct amdgpu_device *adev,
991 struct amdgpu_vm *vm,
992 struct fence *fence);
993uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
994int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
995 struct amdgpu_vm *vm);
996int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
997 struct amdgpu_vm *vm);
998int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm,
999 struct amdgpu_sync *sync);
1000int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1001 struct amdgpu_bo_va *bo_va,
1002 struct ttm_mem_reg *mem);
1003void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
1004 struct amdgpu_bo *bo);
1005struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
1006 struct amdgpu_bo *bo);
1007struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
1008 struct amdgpu_vm *vm,
1009 struct amdgpu_bo *bo);
1010int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1011 struct amdgpu_bo_va *bo_va,
1012 uint64_t addr, uint64_t offset,
1013 uint64_t size, uint32_t flags);
1014int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1015 struct amdgpu_bo_va *bo_va,
1016 uint64_t addr);
1017void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
1018 struct amdgpu_bo_va *bo_va);
1019int amdgpu_vm_free_job(struct amdgpu_job *job);
1020
978/* 1021/*
979 * context related structures 1022 * context related structures
980 */ 1023 */
@@ -1211,6 +1254,7 @@ struct amdgpu_cs_parser {
1211 /* relocations */ 1254 /* relocations */
1212 struct amdgpu_bo_list_entry *vm_bos; 1255 struct amdgpu_bo_list_entry *vm_bos;
1213 struct list_head validated; 1256 struct list_head validated;
1257 struct fence *fence;
1214 1258
1215 struct amdgpu_ib *ibs; 1259 struct amdgpu_ib *ibs;
1216 uint32_t num_ibs; 1260 uint32_t num_ibs;
@@ -1226,7 +1270,7 @@ struct amdgpu_job {
1226 struct amdgpu_device *adev; 1270 struct amdgpu_device *adev;
1227 struct amdgpu_ib *ibs; 1271 struct amdgpu_ib *ibs;
1228 uint32_t num_ibs; 1272 uint32_t num_ibs;
1229 struct mutex job_lock; 1273 void *owner;
1230 struct amdgpu_user_fence uf; 1274 struct amdgpu_user_fence uf;
1231 int (*free_job)(struct amdgpu_job *job); 1275 int (*free_job)(struct amdgpu_job *job);
1232}; 1276};
@@ -2257,11 +2301,6 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
2257bool amdgpu_card_posted(struct amdgpu_device *adev); 2301bool amdgpu_card_posted(struct amdgpu_device *adev);
2258void amdgpu_update_display_priority(struct amdgpu_device *adev); 2302void amdgpu_update_display_priority(struct amdgpu_device *adev);
2259bool amdgpu_boot_test_post_card(struct amdgpu_device *adev); 2303bool amdgpu_boot_test_post_card(struct amdgpu_device *adev);
2260struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
2261 struct drm_file *filp,
2262 struct amdgpu_ctx *ctx,
2263 struct amdgpu_ib *ibs,
2264 uint32_t num_ibs);
2265 2304
2266int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); 2305int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
2267int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, 2306int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
@@ -2319,49 +2358,6 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
2319 unsigned long arg); 2358 unsigned long arg);
2320 2359
2321/* 2360/*
2322 * vm
2323 */
2324int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
2325void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
2326struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
2327 struct amdgpu_vm *vm,
2328 struct list_head *head);
2329int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
2330 struct amdgpu_sync *sync);
2331void amdgpu_vm_flush(struct amdgpu_ring *ring,
2332 struct amdgpu_vm *vm,
2333 struct fence *updates);
2334void amdgpu_vm_fence(struct amdgpu_device *adev,
2335 struct amdgpu_vm *vm,
2336 struct amdgpu_fence *fence);
2337uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
2338int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
2339 struct amdgpu_vm *vm);
2340int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
2341 struct amdgpu_vm *vm);
2342int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
2343 struct amdgpu_vm *vm, struct amdgpu_sync *sync);
2344int amdgpu_vm_bo_update(struct amdgpu_device *adev,
2345 struct amdgpu_bo_va *bo_va,
2346 struct ttm_mem_reg *mem);
2347void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
2348 struct amdgpu_bo *bo);
2349struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
2350 struct amdgpu_bo *bo);
2351struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
2352 struct amdgpu_vm *vm,
2353 struct amdgpu_bo *bo);
2354int amdgpu_vm_bo_map(struct amdgpu_device *adev,
2355 struct amdgpu_bo_va *bo_va,
2356 uint64_t addr, uint64_t offset,
2357 uint64_t size, uint32_t flags);
2358int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
2359 struct amdgpu_bo_va *bo_va,
2360 uint64_t addr);
2361void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
2362 struct amdgpu_bo_va *bo_va);
2363int amdgpu_vm_free_job(struct amdgpu_job *job);
2364/*
2365 * functions used by amdgpu_encoder.c 2361 * functions used by amdgpu_encoder.c
2366 */ 2362 */
2367struct amdgpu_afmt_acr { 2363struct amdgpu_afmt_acr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dfc4d02c7a38..3afcf0237c25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -127,30 +127,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
127 return 0; 127 return 0;
128} 128}
129 129
130struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
131 struct drm_file *filp,
132 struct amdgpu_ctx *ctx,
133 struct amdgpu_ib *ibs,
134 uint32_t num_ibs)
135{
136 struct amdgpu_cs_parser *parser;
137 int i;
138
139 parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL);
140 if (!parser)
141 return NULL;
142
143 parser->adev = adev;
144 parser->filp = filp;
145 parser->ctx = ctx;
146 parser->ibs = ibs;
147 parser->num_ibs = num_ibs;
148 for (i = 0; i < num_ibs; i++)
149 ibs[i].ctx = ctx;
150
151 return parser;
152}
153
154int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) 130int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
155{ 131{
156 union drm_amdgpu_cs *cs = data; 132 union drm_amdgpu_cs *cs = data;
@@ -463,8 +439,18 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a,
463 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; 439 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
464} 440}
465 441
466static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff) 442/**
443 * cs_parser_fini() - clean parser states
444 * @parser: parser structure holding parsing context.
445 * @error: error number
446 *
447 * If error is set than unvalidate buffer, otherwise just free memory
448 * used by parsing context.
449 **/
450static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
467{ 451{
452 unsigned i;
453
468 if (!error) { 454 if (!error) {
469 /* Sort the buffer list from the smallest to largest buffer, 455 /* Sort the buffer list from the smallest to largest buffer,
470 * which affects the order of buffers in the LRU list. 456 * which affects the order of buffers in the LRU list.
@@ -479,17 +465,14 @@ static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int err
479 list_sort(NULL, &parser->validated, cmp_size_smaller_first); 465 list_sort(NULL, &parser->validated, cmp_size_smaller_first);
480 466
481 ttm_eu_fence_buffer_objects(&parser->ticket, 467 ttm_eu_fence_buffer_objects(&parser->ticket,
482 &parser->validated, 468 &parser->validated,
483 &parser->ibs[parser->num_ibs-1].fence->base); 469 parser->fence);
484 } else if (backoff) { 470 } else if (backoff) {
485 ttm_eu_backoff_reservation(&parser->ticket, 471 ttm_eu_backoff_reservation(&parser->ticket,
486 &parser->validated); 472 &parser->validated);
487 } 473 }
488} 474 fence_put(parser->fence);
489 475
490static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
491{
492 unsigned i;
493 if (parser->ctx) 476 if (parser->ctx)
494 amdgpu_ctx_put(parser->ctx); 477 amdgpu_ctx_put(parser->ctx);
495 if (parser->bo_list) 478 if (parser->bo_list)
@@ -499,31 +482,12 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
499 for (i = 0; i < parser->nchunks; i++) 482 for (i = 0; i < parser->nchunks; i++)
500 drm_free_large(parser->chunks[i].kdata); 483 drm_free_large(parser->chunks[i].kdata);
501 kfree(parser->chunks); 484 kfree(parser->chunks);
502 if (!amdgpu_enable_scheduler) 485 if (parser->ibs)
503 { 486 for (i = 0; i < parser->num_ibs; i++)
504 if (parser->ibs) 487 amdgpu_ib_free(parser->adev, &parser->ibs[i]);
505 for (i = 0; i < parser->num_ibs; i++) 488 kfree(parser->ibs);
506 amdgpu_ib_free(parser->adev, &parser->ibs[i]); 489 if (parser->uf.bo)
507 kfree(parser->ibs); 490 drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
508 if (parser->uf.bo)
509 drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
510 }
511
512 kfree(parser);
513}
514
515/**
516 * cs_parser_fini() - clean parser states
517 * @parser: parser structure holding parsing context.
518 * @error: error number
519 *
520 * If error is set than unvalidate buffer, otherwise just free memory
521 * used by parsing context.
522 **/
523static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
524{
525 amdgpu_cs_parser_fini_early(parser, error, backoff);
526 amdgpu_cs_parser_fini_late(parser);
527} 491}
528 492
529static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, 493static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
@@ -610,15 +574,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
610 } 574 }
611 575
612 r = amdgpu_bo_vm_update_pte(parser, vm); 576 r = amdgpu_bo_vm_update_pte(parser, vm);
613 if (r) { 577 if (!r)
614 goto out; 578 amdgpu_cs_sync_rings(parser);
615 }
616 amdgpu_cs_sync_rings(parser);
617 if (!amdgpu_enable_scheduler)
618 r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs,
619 parser->filp);
620 579
621out:
622 return r; 580 return r;
623} 581}
624 582
@@ -828,36 +786,36 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
828 union drm_amdgpu_cs *cs = data; 786 union drm_amdgpu_cs *cs = data;
829 struct amdgpu_fpriv *fpriv = filp->driver_priv; 787 struct amdgpu_fpriv *fpriv = filp->driver_priv;
830 struct amdgpu_vm *vm = &fpriv->vm; 788 struct amdgpu_vm *vm = &fpriv->vm;
831 struct amdgpu_cs_parser *parser; 789 struct amdgpu_cs_parser parser = {};
832 bool reserved_buffers = false; 790 bool reserved_buffers = false;
833 int i, r; 791 int i, r;
834 792
835 if (!adev->accel_working) 793 if (!adev->accel_working)
836 return -EBUSY; 794 return -EBUSY;
837 795
838 parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0); 796 parser.adev = adev;
839 if (!parser) 797 parser.filp = filp;
840 return -ENOMEM; 798
841 r = amdgpu_cs_parser_init(parser, data); 799 r = amdgpu_cs_parser_init(&parser, data);
842 if (r) { 800 if (r) {
843 DRM_ERROR("Failed to initialize parser !\n"); 801 DRM_ERROR("Failed to initialize parser !\n");
844 amdgpu_cs_parser_fini(parser, r, false); 802 amdgpu_cs_parser_fini(&parser, r, false);
845 r = amdgpu_cs_handle_lockup(adev, r); 803 r = amdgpu_cs_handle_lockup(adev, r);
846 return r; 804 return r;
847 } 805 }
848 mutex_lock(&vm->mutex); 806 mutex_lock(&vm->mutex);
849 r = amdgpu_cs_parser_relocs(parser); 807 r = amdgpu_cs_parser_relocs(&parser);
850 if (r == -ENOMEM) 808 if (r == -ENOMEM)
851 DRM_ERROR("Not enough memory for command submission!\n"); 809 DRM_ERROR("Not enough memory for command submission!\n");
852 else if (r && r != -ERESTARTSYS) 810 else if (r && r != -ERESTARTSYS)
853 DRM_ERROR("Failed to process the buffer list %d!\n", r); 811 DRM_ERROR("Failed to process the buffer list %d!\n", r);
854 else if (!r) { 812 else if (!r) {
855 reserved_buffers = true; 813 reserved_buffers = true;
856 r = amdgpu_cs_ib_fill(adev, parser); 814 r = amdgpu_cs_ib_fill(adev, &parser);
857 } 815 }
858 816
859 if (!r) { 817 if (!r) {
860 r = amdgpu_cs_dependencies(adev, parser); 818 r = amdgpu_cs_dependencies(adev, &parser);
861 if (r) 819 if (r)
862 DRM_ERROR("Failed in the dependencies handling %d!\n", r); 820 DRM_ERROR("Failed in the dependencies handling %d!\n", r);
863 } 821 }
@@ -865,62 +823,71 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
865 if (r) 823 if (r)
866 goto out; 824 goto out;
867 825
868 for (i = 0; i < parser->num_ibs; i++) 826 for (i = 0; i < parser.num_ibs; i++)
869 trace_amdgpu_cs(parser, i); 827 trace_amdgpu_cs(&parser, i);
870 828
871 r = amdgpu_cs_ib_vm_chunk(adev, parser); 829 r = amdgpu_cs_ib_vm_chunk(adev, &parser);
872 if (r) 830 if (r)
873 goto out; 831 goto out;
874 832
875 if (amdgpu_enable_scheduler && parser->num_ibs) { 833 if (amdgpu_enable_scheduler && parser.num_ibs) {
834 struct amdgpu_ring * ring = parser.ibs->ring;
835 struct amd_sched_fence *fence;
876 struct amdgpu_job *job; 836 struct amdgpu_job *job;
877 struct amdgpu_ring * ring = parser->ibs->ring; 837
878 job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); 838 job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
879 if (!job) { 839 if (!job) {
880 r = -ENOMEM; 840 r = -ENOMEM;
881 goto out; 841 goto out;
882 } 842 }
843
883 job->base.sched = &ring->sched; 844 job->base.sched = &ring->sched;
884 job->base.s_entity = &parser->ctx->rings[ring->idx].entity; 845 job->base.s_entity = &parser.ctx->rings[ring->idx].entity;
885 job->adev = parser->adev; 846 job->adev = parser.adev;
886 job->ibs = parser->ibs; 847 job->owner = parser.filp;
887 job->num_ibs = parser->num_ibs; 848 job->free_job = amdgpu_cs_free_job;
888 job->base.owner = parser->filp; 849
889 mutex_init(&job->job_lock); 850 job->ibs = parser.ibs;
851 job->num_ibs = parser.num_ibs;
852 parser.ibs = NULL;
853 parser.num_ibs = 0;
854
890 if (job->ibs[job->num_ibs - 1].user) { 855 if (job->ibs[job->num_ibs - 1].user) {
891 memcpy(&job->uf, &parser->uf, 856 job->uf = parser.uf;
892 sizeof(struct amdgpu_user_fence));
893 job->ibs[job->num_ibs - 1].user = &job->uf; 857 job->ibs[job->num_ibs - 1].user = &job->uf;
858 parser.uf.bo = NULL;
894 } 859 }
895 860
896 job->free_job = amdgpu_cs_free_job; 861 fence = amd_sched_fence_create(job->base.s_entity,
897 mutex_lock(&job->job_lock); 862 parser.filp);
898 r = amd_sched_entity_push_job(&job->base); 863 if (!fence) {
899 if (r) { 864 r = -ENOMEM;
900 mutex_unlock(&job->job_lock);
901 amdgpu_cs_free_job(job); 865 amdgpu_cs_free_job(job);
902 kfree(job); 866 kfree(job);
903 goto out; 867 goto out;
904 } 868 }
905 cs->out.handle = 869 job->base.s_fence = fence;
906 amdgpu_ctx_add_fence(parser->ctx, ring, 870 parser.fence = fence_get(&fence->base);
907 &job->base.s_fence->base);
908 parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle;
909 871
910 list_sort(NULL, &parser->validated, cmp_size_smaller_first); 872 cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring,
911 ttm_eu_fence_buffer_objects(&parser->ticket, 873 &fence->base);
912 &parser->validated, 874 job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
913 &job->base.s_fence->base);
914 875
915 mutex_unlock(&job->job_lock); 876 trace_amdgpu_cs_ioctl(job);
916 amdgpu_cs_parser_fini_late(parser); 877 amd_sched_entity_push_job(&job->base);
917 mutex_unlock(&vm->mutex); 878
918 return 0; 879 } else {
880 struct amdgpu_fence *fence;
881
882 r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs,
883 parser.filp);
884 fence = parser.ibs[parser.num_ibs - 1].fence;
885 parser.fence = fence_get(&fence->base);
886 cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence;
919 } 887 }
920 888
921 cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
922out: 889out:
923 amdgpu_cs_parser_fini(parser, r, reserved_buffers); 890 amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
924 mutex_unlock(&vm->mutex); 891 mutex_unlock(&vm->mutex);
925 r = amdgpu_cs_handle_lockup(adev, r); 892 r = amdgpu_cs_handle_lockup(adev, r);
926 return r; 893 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 257d72205bb5..3671f9f220bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -47,6 +47,9 @@
47 * that the the relevant GPU caches have been flushed. 47 * that the the relevant GPU caches have been flushed.
48 */ 48 */
49 49
50static struct kmem_cache *amdgpu_fence_slab;
51static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
52
50/** 53/**
51 * amdgpu_fence_write - write a fence value 54 * amdgpu_fence_write - write a fence value
52 * 55 *
@@ -85,24 +88,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
85} 88}
86 89
87/** 90/**
88 * amdgpu_fence_schedule_check - schedule lockup check
89 *
90 * @ring: pointer to struct amdgpu_ring
91 *
92 * Queues a delayed work item to check for lockups.
93 */
94static void amdgpu_fence_schedule_check(struct amdgpu_ring *ring)
95{
96 /*
97 * Do not reset the timer here with mod_delayed_work,
98 * this can livelock in an interaction with TTM delayed destroy.
99 */
100 queue_delayed_work(system_power_efficient_wq,
101 &ring->fence_drv.lockup_work,
102 AMDGPU_FENCE_JIFFIES_TIMEOUT);
103}
104
105/**
106 * amdgpu_fence_emit - emit a fence on the requested ring 91 * amdgpu_fence_emit - emit a fence on the requested ring
107 * 92 *
108 * @ring: ring the fence is associated with 93 * @ring: ring the fence is associated with
@@ -118,7 +103,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
118 struct amdgpu_device *adev = ring->adev; 103 struct amdgpu_device *adev = ring->adev;
119 104
120 /* we are protected by the ring emission mutex */ 105 /* we are protected by the ring emission mutex */
121 *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); 106 *fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
122 if ((*fence) == NULL) { 107 if ((*fence) == NULL) {
123 return -ENOMEM; 108 return -ENOMEM;
124 } 109 }
@@ -132,11 +117,23 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
132 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 117 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
133 (*fence)->seq, 118 (*fence)->seq,
134 AMDGPU_FENCE_FLAG_INT); 119 AMDGPU_FENCE_FLAG_INT);
135 trace_amdgpu_fence_emit(ring->adev->ddev, ring->idx, (*fence)->seq);
136 return 0; 120 return 0;
137} 121}
138 122
139/** 123/**
124 * amdgpu_fence_schedule_fallback - schedule fallback check
125 *
126 * @ring: pointer to struct amdgpu_ring
127 *
128 * Start a timer as fallback to our interrupts.
129 */
130static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
131{
132 mod_timer(&ring->fence_drv.fallback_timer,
133 jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
134}
135
136/**
140 * amdgpu_fence_activity - check for fence activity 137 * amdgpu_fence_activity - check for fence activity
141 * 138 *
142 * @ring: pointer to struct amdgpu_ring 139 * @ring: pointer to struct amdgpu_ring
@@ -202,45 +199,38 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
202 } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq); 199 } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
203 200
204 if (seq < last_emitted) 201 if (seq < last_emitted)
205 amdgpu_fence_schedule_check(ring); 202 amdgpu_fence_schedule_fallback(ring);
206 203
207 return wake; 204 return wake;
208} 205}
209 206
210/** 207/**
211 * amdgpu_fence_check_lockup - check for hardware lockup 208 * amdgpu_fence_process - process a fence
212 * 209 *
213 * @work: delayed work item 210 * @adev: amdgpu_device pointer
211 * @ring: ring index the fence is associated with
214 * 212 *
215 * Checks for fence activity and if there is none probe 213 * Checks the current fence value and wakes the fence queue
216 * the hardware if a lockup occured. 214 * if the sequence number has increased (all asics).
217 */ 215 */
218static void amdgpu_fence_check_lockup(struct work_struct *work) 216void amdgpu_fence_process(struct amdgpu_ring *ring)
219{ 217{
220 struct amdgpu_fence_driver *fence_drv;
221 struct amdgpu_ring *ring;
222
223 fence_drv = container_of(work, struct amdgpu_fence_driver,
224 lockup_work.work);
225 ring = fence_drv->ring;
226
227 if (amdgpu_fence_activity(ring)) 218 if (amdgpu_fence_activity(ring))
228 wake_up_all(&ring->fence_drv.fence_queue); 219 wake_up_all(&ring->fence_drv.fence_queue);
229} 220}
230 221
231/** 222/**
232 * amdgpu_fence_process - process a fence 223 * amdgpu_fence_fallback - fallback for hardware interrupts
233 * 224 *
234 * @adev: amdgpu_device pointer 225 * @work: delayed work item
235 * @ring: ring index the fence is associated with
236 * 226 *
237 * Checks the current fence value and wakes the fence queue 227 * Checks for fence activity.
238 * if the sequence number has increased (all asics).
239 */ 228 */
240void amdgpu_fence_process(struct amdgpu_ring *ring) 229static void amdgpu_fence_fallback(unsigned long arg)
241{ 230{
242 if (amdgpu_fence_activity(ring)) 231 struct amdgpu_ring *ring = (void *)arg;
243 wake_up_all(&ring->fence_drv.fence_queue); 232
233 amdgpu_fence_process(ring);
244} 234}
245 235
246/** 236/**
@@ -290,7 +280,7 @@ static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
290 if (atomic64_read(&ring->fence_drv.last_seq) >= seq) 280 if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
291 return 0; 281 return 0;
292 282
293 amdgpu_fence_schedule_check(ring); 283 amdgpu_fence_schedule_fallback(ring);
294 wait_event(ring->fence_drv.fence_queue, ( 284 wait_event(ring->fence_drv.fence_queue, (
295 (signaled = amdgpu_fence_seq_signaled(ring, seq)))); 285 (signaled = amdgpu_fence_seq_signaled(ring, seq))));
296 286
@@ -491,9 +481,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
491 atomic64_set(&ring->fence_drv.last_seq, 0); 481 atomic64_set(&ring->fence_drv.last_seq, 0);
492 ring->fence_drv.initialized = false; 482 ring->fence_drv.initialized = false;
493 483
494 INIT_DELAYED_WORK(&ring->fence_drv.lockup_work, 484 setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
495 amdgpu_fence_check_lockup); 485 (unsigned long)ring);
496 ring->fence_drv.ring = ring;
497 486
498 init_waitqueue_head(&ring->fence_drv.fence_queue); 487 init_waitqueue_head(&ring->fence_drv.fence_queue);
499 488
@@ -536,6 +525,13 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
536 */ 525 */
537int amdgpu_fence_driver_init(struct amdgpu_device *adev) 526int amdgpu_fence_driver_init(struct amdgpu_device *adev)
538{ 527{
528 if (atomic_inc_return(&amdgpu_fence_slab_ref) == 1) {
529 amdgpu_fence_slab = kmem_cache_create(
530 "amdgpu_fence", sizeof(struct amdgpu_fence), 0,
531 SLAB_HWCACHE_ALIGN, NULL);
532 if (!amdgpu_fence_slab)
533 return -ENOMEM;
534 }
539 if (amdgpu_debugfs_fence_init(adev)) 535 if (amdgpu_debugfs_fence_init(adev))
540 dev_err(adev->dev, "fence debugfs file creation failed\n"); 536 dev_err(adev->dev, "fence debugfs file creation failed\n");
541 537
@@ -554,9 +550,12 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
554{ 550{
555 int i, r; 551 int i, r;
556 552
553 if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
554 kmem_cache_destroy(amdgpu_fence_slab);
557 mutex_lock(&adev->ring_lock); 555 mutex_lock(&adev->ring_lock);
558 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 556 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
559 struct amdgpu_ring *ring = adev->rings[i]; 557 struct amdgpu_ring *ring = adev->rings[i];
558
560 if (!ring || !ring->fence_drv.initialized) 559 if (!ring || !ring->fence_drv.initialized)
561 continue; 560 continue;
562 r = amdgpu_fence_wait_empty(ring); 561 r = amdgpu_fence_wait_empty(ring);
@@ -568,6 +567,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
568 amdgpu_irq_put(adev, ring->fence_drv.irq_src, 567 amdgpu_irq_put(adev, ring->fence_drv.irq_src,
569 ring->fence_drv.irq_type); 568 ring->fence_drv.irq_type);
570 amd_sched_fini(&ring->sched); 569 amd_sched_fini(&ring->sched);
570 del_timer_sync(&ring->fence_drv.fallback_timer);
571 ring->fence_drv.initialized = false; 571 ring->fence_drv.initialized = false;
572 } 572 }
573 mutex_unlock(&adev->ring_lock); 573 mutex_unlock(&adev->ring_lock);
@@ -751,18 +751,25 @@ static bool amdgpu_fence_enable_signaling(struct fence *f)
751 fence->fence_wake.func = amdgpu_fence_check_signaled; 751 fence->fence_wake.func = amdgpu_fence_check_signaled;
752 __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake); 752 __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
753 fence_get(f); 753 fence_get(f);
754 amdgpu_fence_schedule_check(ring); 754 if (!timer_pending(&ring->fence_drv.fallback_timer))
755 amdgpu_fence_schedule_fallback(ring);
755 FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); 756 FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
756 return true; 757 return true;
757} 758}
758 759
760static void amdgpu_fence_release(struct fence *f)
761{
762 struct amdgpu_fence *fence = to_amdgpu_fence(f);
763 kmem_cache_free(amdgpu_fence_slab, fence);
764}
765
759const struct fence_ops amdgpu_fence_ops = { 766const struct fence_ops amdgpu_fence_ops = {
760 .get_driver_name = amdgpu_fence_get_driver_name, 767 .get_driver_name = amdgpu_fence_get_driver_name,
761 .get_timeline_name = amdgpu_fence_get_timeline_name, 768 .get_timeline_name = amdgpu_fence_get_timeline_name,
762 .enable_signaling = amdgpu_fence_enable_signaling, 769 .enable_signaling = amdgpu_fence_enable_signaling,
763 .signaled = amdgpu_fence_is_signaled, 770 .signaled = amdgpu_fence_is_signaled,
764 .wait = fence_default_wait, 771 .wait = fence_default_wait,
765 .release = NULL, 772 .release = amdgpu_fence_release,
766}; 773};
767 774
768/* 775/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 087332858853..00c5b580f56c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -483,6 +483,9 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
483 if (domain == AMDGPU_GEM_DOMAIN_CPU) 483 if (domain == AMDGPU_GEM_DOMAIN_CPU)
484 goto error_unreserve; 484 goto error_unreserve;
485 } 485 }
486 r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
487 if (r)
488 goto error_unreserve;
486 489
487 r = amdgpu_vm_clear_freed(adev, bo_va->vm); 490 r = amdgpu_vm_clear_freed(adev, bo_va->vm);
488 if (r) 491 if (r)
@@ -512,6 +515,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
512 struct amdgpu_fpriv *fpriv = filp->driver_priv; 515 struct amdgpu_fpriv *fpriv = filp->driver_priv;
513 struct amdgpu_bo *rbo; 516 struct amdgpu_bo *rbo;
514 struct amdgpu_bo_va *bo_va; 517 struct amdgpu_bo_va *bo_va;
518 struct ttm_validate_buffer tv, tv_pd;
519 struct ww_acquire_ctx ticket;
520 struct list_head list, duplicates;
515 uint32_t invalid_flags, va_flags = 0; 521 uint32_t invalid_flags, va_flags = 0;
516 int r = 0; 522 int r = 0;
517 523
@@ -549,7 +555,18 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
549 return -ENOENT; 555 return -ENOENT;
550 mutex_lock(&fpriv->vm.mutex); 556 mutex_lock(&fpriv->vm.mutex);
551 rbo = gem_to_amdgpu_bo(gobj); 557 rbo = gem_to_amdgpu_bo(gobj);
552 r = amdgpu_bo_reserve(rbo, false); 558 INIT_LIST_HEAD(&list);
559 INIT_LIST_HEAD(&duplicates);
560 tv.bo = &rbo->tbo;
561 tv.shared = true;
562 list_add(&tv.head, &list);
563
564 if (args->operation == AMDGPU_VA_OP_MAP) {
565 tv_pd.bo = &fpriv->vm.page_directory->tbo;
566 tv_pd.shared = true;
567 list_add(&tv_pd.head, &list);
568 }
569 r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
553 if (r) { 570 if (r) {
554 mutex_unlock(&fpriv->vm.mutex); 571 mutex_unlock(&fpriv->vm.mutex);
555 drm_gem_object_unreference_unlocked(gobj); 572 drm_gem_object_unreference_unlocked(gobj);
@@ -558,7 +575,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
558 575
559 bo_va = amdgpu_vm_bo_find(&fpriv->vm, rbo); 576 bo_va = amdgpu_vm_bo_find(&fpriv->vm, rbo);
560 if (!bo_va) { 577 if (!bo_va) {
561 amdgpu_bo_unreserve(rbo); 578 ttm_eu_backoff_reservation(&ticket, &list);
579 drm_gem_object_unreference_unlocked(gobj);
562 mutex_unlock(&fpriv->vm.mutex); 580 mutex_unlock(&fpriv->vm.mutex);
563 return -ENOENT; 581 return -ENOENT;
564 } 582 }
@@ -581,7 +599,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
581 default: 599 default:
582 break; 600 break;
583 } 601 }
584 602 ttm_eu_backoff_reservation(&ticket, &list);
585 if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE)) 603 if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
586 amdgpu_gem_va_update_vm(adev, bo_va, args->operation); 604 amdgpu_gem_va_update_vm(adev, bo_va, args->operation);
587 mutex_unlock(&fpriv->vm.mutex); 605 mutex_unlock(&fpriv->vm.mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index e65987743871..9e25edafa721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -62,7 +62,7 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
62 int r; 62 int r;
63 63
64 if (size) { 64 if (size) {
65 r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo, 65 r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
66 &ib->sa_bo, size, 256); 66 &ib->sa_bo, size, 256);
67 if (r) { 67 if (r) {
68 dev_err(adev->dev, "failed to get a new IB (%d)\n", r); 68 dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
@@ -216,7 +216,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
216 } 216 }
217 217
218 if (ib->vm) 218 if (ib->vm)
219 amdgpu_vm_fence(adev, ib->vm, ib->fence); 219 amdgpu_vm_fence(adev, ib->vm, &ib->fence->base);
220 220
221 amdgpu_ring_unlock_commit(ring); 221 amdgpu_ring_unlock_commit(ring);
222 return 0; 222 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 3c2ff4567798..ea756e77b023 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -189,10 +189,9 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
189 struct amdgpu_sa_manager *sa_manager); 189 struct amdgpu_sa_manager *sa_manager);
190int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, 190int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
191 struct amdgpu_sa_manager *sa_manager); 191 struct amdgpu_sa_manager *sa_manager);
192int amdgpu_sa_bo_new(struct amdgpu_device *adev, 192int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
193 struct amdgpu_sa_manager *sa_manager, 193 struct amdgpu_sa_bo **sa_bo,
194 struct amdgpu_sa_bo **sa_bo, 194 unsigned size, unsigned align);
195 unsigned size, unsigned align);
196void amdgpu_sa_bo_free(struct amdgpu_device *adev, 195void amdgpu_sa_bo_free(struct amdgpu_device *adev,
197 struct amdgpu_sa_bo **sa_bo, 196 struct amdgpu_sa_bo **sa_bo,
198 struct fence *fence); 197 struct fence *fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 0212b31dc194..8b88edb0434b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -311,8 +311,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
311 return false; 311 return false;
312} 312}
313 313
314int amdgpu_sa_bo_new(struct amdgpu_device *adev, 314int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
315 struct amdgpu_sa_manager *sa_manager,
316 struct amdgpu_sa_bo **sa_bo, 315 struct amdgpu_sa_bo **sa_bo,
317 unsigned size, unsigned align) 316 unsigned size, unsigned align)
318{ 317{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index dcf4a8aca680..438c05254695 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -26,6 +26,7 @@
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <drm/drmP.h> 27#include <drm/drmP.h>
28#include "amdgpu.h" 28#include "amdgpu.h"
29#include "amdgpu_trace.h"
29 30
30static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job) 31static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
31{ 32{
@@ -44,11 +45,8 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
44 return NULL; 45 return NULL;
45 } 46 }
46 job = to_amdgpu_job(sched_job); 47 job = to_amdgpu_job(sched_job);
47 mutex_lock(&job->job_lock); 48 trace_amdgpu_sched_run_job(job);
48 r = amdgpu_ib_schedule(job->adev, 49 r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner);
49 job->num_ibs,
50 job->ibs,
51 job->base.owner);
52 if (r) { 50 if (r) {
53 DRM_ERROR("Error scheduling IBs (%d)\n", r); 51 DRM_ERROR("Error scheduling IBs (%d)\n", r);
54 goto err; 52 goto err;
@@ -61,8 +59,6 @@ err:
61 if (job->free_job) 59 if (job->free_job)
62 job->free_job(job); 60 job->free_job(job);
63 61
64 mutex_unlock(&job->job_lock);
65 fence_put(&job->base.s_fence->base);
66 kfree(job); 62 kfree(job);
67 return fence ? &fence->base : NULL; 63 return fence ? &fence->base : NULL;
68} 64}
@@ -88,21 +84,19 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
88 return -ENOMEM; 84 return -ENOMEM;
89 job->base.sched = &ring->sched; 85 job->base.sched = &ring->sched;
90 job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; 86 job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
87 job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
88 if (!job->base.s_fence) {
89 kfree(job);
90 return -ENOMEM;
91 }
92 *f = fence_get(&job->base.s_fence->base);
93
91 job->adev = adev; 94 job->adev = adev;
92 job->ibs = ibs; 95 job->ibs = ibs;
93 job->num_ibs = num_ibs; 96 job->num_ibs = num_ibs;
94 job->base.owner = owner; 97 job->owner = owner;
95 mutex_init(&job->job_lock);
96 job->free_job = free_job; 98 job->free_job = free_job;
97 mutex_lock(&job->job_lock); 99 amd_sched_entity_push_job(&job->base);
98 r = amd_sched_entity_push_job(&job->base);
99 if (r) {
100 mutex_unlock(&job->job_lock);
101 kfree(job);
102 return r;
103 }
104 *f = fence_get(&job->base.s_fence->base);
105 mutex_unlock(&job->job_lock);
106 } else { 100 } else {
107 r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); 101 r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
108 if (r) 102 if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
index ff3ca52ec6fe..1caaf201b708 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
@@ -40,7 +40,7 @@ int amdgpu_semaphore_create(struct amdgpu_device *adev,
40 if (*semaphore == NULL) { 40 if (*semaphore == NULL) {
41 return -ENOMEM; 41 return -ENOMEM;
42 } 42 }
43 r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo, 43 r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
44 &(*semaphore)->sa_bo, 8, 8); 44 &(*semaphore)->sa_bo, 8, 8);
45 if (r) { 45 if (r) {
46 kfree(*semaphore); 46 kfree(*semaphore);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index a6697fd05217..dd005c336c97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -302,8 +302,14 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
302 return -EINVAL; 302 return -EINVAL;
303 } 303 }
304 304
305 if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores || 305 if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) {
306 (count >= AMDGPU_NUM_SYNCS)) { 306 r = fence_wait(&fence->base, true);
307 if (r)
308 return r;
309 continue;
310 }
311
312 if (count >= AMDGPU_NUM_SYNCS) {
307 /* not enough room, wait manually */ 313 /* not enough room, wait manually */
308 r = fence_wait(&fence->base, false); 314 r = fence_wait(&fence->base, false);
309 if (r) 315 if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 76ecbaf72a2e..8f9834ab1bd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -48,6 +48,57 @@ TRACE_EVENT(amdgpu_cs,
48 __entry->fences) 48 __entry->fences)
49); 49);
50 50
51TRACE_EVENT(amdgpu_cs_ioctl,
52 TP_PROTO(struct amdgpu_job *job),
53 TP_ARGS(job),
54 TP_STRUCT__entry(
55 __field(struct amdgpu_device *, adev)
56 __field(struct amd_sched_job *, sched_job)
57 __field(struct amdgpu_ib *, ib)
58 __field(struct fence *, fence)
59 __field(char *, ring_name)
60 __field(u32, num_ibs)
61 ),
62
63 TP_fast_assign(
64 __entry->adev = job->adev;
65 __entry->sched_job = &job->base;
66 __entry->ib = job->ibs;
67 __entry->fence = &job->base.s_fence->base;
68 __entry->ring_name = job->ibs[0].ring->name;
69 __entry->num_ibs = job->num_ibs;
70 ),
71 TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
72 __entry->adev, __entry->sched_job, __entry->ib,
73 __entry->fence, __entry->ring_name, __entry->num_ibs)
74);
75
76TRACE_EVENT(amdgpu_sched_run_job,
77 TP_PROTO(struct amdgpu_job *job),
78 TP_ARGS(job),
79 TP_STRUCT__entry(
80 __field(struct amdgpu_device *, adev)
81 __field(struct amd_sched_job *, sched_job)
82 __field(struct amdgpu_ib *, ib)
83 __field(struct fence *, fence)
84 __field(char *, ring_name)
85 __field(u32, num_ibs)
86 ),
87
88 TP_fast_assign(
89 __entry->adev = job->adev;
90 __entry->sched_job = &job->base;
91 __entry->ib = job->ibs;
92 __entry->fence = &job->base.s_fence->base;
93 __entry->ring_name = job->ibs[0].ring->name;
94 __entry->num_ibs = job->num_ibs;
95 ),
96 TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
97 __entry->adev, __entry->sched_job, __entry->ib,
98 __entry->fence, __entry->ring_name, __entry->num_ibs)
99);
100
101
51TRACE_EVENT(amdgpu_vm_grab_id, 102TRACE_EVENT(amdgpu_vm_grab_id,
52 TP_PROTO(unsigned vmid, int ring), 103 TP_PROTO(unsigned vmid, int ring),
53 TP_ARGS(vmid, ring), 104 TP_ARGS(vmid, ring),
@@ -196,49 +247,6 @@ TRACE_EVENT(amdgpu_bo_list_set,
196 TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) 247 TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
197); 248);
198 249
199DECLARE_EVENT_CLASS(amdgpu_fence_request,
200
201 TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
202
203 TP_ARGS(dev, ring, seqno),
204
205 TP_STRUCT__entry(
206 __field(u32, dev)
207 __field(int, ring)
208 __field(u32, seqno)
209 ),
210
211 TP_fast_assign(
212 __entry->dev = dev->primary->index;
213 __entry->ring = ring;
214 __entry->seqno = seqno;
215 ),
216
217 TP_printk("dev=%u, ring=%d, seqno=%u",
218 __entry->dev, __entry->ring, __entry->seqno)
219);
220
221DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_emit,
222
223 TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
224
225 TP_ARGS(dev, ring, seqno)
226);
227
228DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_begin,
229
230 TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
231
232 TP_ARGS(dev, ring, seqno)
233);
234
235DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_end,
236
237 TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
238
239 TP_ARGS(dev, ring, seqno)
240);
241
242DECLARE_EVENT_CLASS(amdgpu_semaphore_request, 250DECLARE_EVENT_CLASS(amdgpu_semaphore_request,
243 251
244 TP_PROTO(int ring, struct amdgpu_semaphore *sem), 252 TP_PROTO(int ring, struct amdgpu_semaphore *sem),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 81bb8e9fc26d..d4bac5f49939 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1073,10 +1073,10 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
1073 ret = drm_mm_dump_table(m, mm); 1073 ret = drm_mm_dump_table(m, mm);
1074 spin_unlock(&glob->lru_lock); 1074 spin_unlock(&glob->lru_lock);
1075 if (ttm_pl == TTM_PL_VRAM) 1075 if (ttm_pl == TTM_PL_VRAM)
1076 seq_printf(m, "man size:%llu pages, ram usage:%luMB, vis usage:%luMB\n", 1076 seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
1077 adev->mman.bdev.man[ttm_pl].size, 1077 adev->mman.bdev.man[ttm_pl].size,
1078 atomic64_read(&adev->vram_usage) >> 20, 1078 (u64)atomic64_read(&adev->vram_usage) >> 20,
1079 atomic64_read(&adev->vram_vis_usage) >> 20); 1079 (u64)atomic64_read(&adev->vram_vis_usage) >> 20);
1080 return ret; 1080 return ret;
1081} 1081}
1082 1082
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 633a32a48560..159ce54bbd8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -143,10 +143,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
143 unsigned i; 143 unsigned i;
144 144
145 /* check if the id is still valid */ 145 /* check if the id is still valid */
146 if (vm_id->id && vm_id->last_id_use && 146 if (vm_id->id) {
147 vm_id->last_id_use == adev->vm_manager.active[vm_id->id]) { 147 unsigned id = vm_id->id;
148 trace_amdgpu_vm_grab_id(vm_id->id, ring->idx); 148 long owner;
149 return 0; 149
150 owner = atomic_long_read(&adev->vm_manager.ids[id].owner);
151 if (owner == (long)vm) {
152 trace_amdgpu_vm_grab_id(vm_id->id, ring->idx);
153 return 0;
154 }
150 } 155 }
151 156
152 /* we definately need to flush */ 157 /* we definately need to flush */
@@ -154,7 +159,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
154 159
155 /* skip over VMID 0, since it is the system VM */ 160 /* skip over VMID 0, since it is the system VM */
156 for (i = 1; i < adev->vm_manager.nvm; ++i) { 161 for (i = 1; i < adev->vm_manager.nvm; ++i) {
157 struct fence *fence = adev->vm_manager.active[i]; 162 struct fence *fence = adev->vm_manager.ids[i].active;
158 struct amdgpu_ring *fring; 163 struct amdgpu_ring *fring;
159 164
160 if (fence == NULL) { 165 if (fence == NULL) {
@@ -176,7 +181,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
176 if (choices[i]) { 181 if (choices[i]) {
177 struct fence *fence; 182 struct fence *fence;
178 183
179 fence = adev->vm_manager.active[choices[i]]; 184 fence = adev->vm_manager.ids[choices[i]].active;
180 vm_id->id = choices[i]; 185 vm_id->id = choices[i];
181 186
182 trace_amdgpu_vm_grab_id(choices[i], ring->idx); 187 trace_amdgpu_vm_grab_id(choices[i], ring->idx);
@@ -207,24 +212,21 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
207 uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); 212 uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
208 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; 213 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
209 struct fence *flushed_updates = vm_id->flushed_updates; 214 struct fence *flushed_updates = vm_id->flushed_updates;
210 bool is_earlier = false; 215 bool is_later;
211
212 if (flushed_updates && updates) {
213 BUG_ON(flushed_updates->context != updates->context);
214 is_earlier = (updates->seqno - flushed_updates->seqno <=
215 INT_MAX) ? true : false;
216 }
217 216
218 if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates || 217 if (!flushed_updates)
219 is_earlier) { 218 is_later = true;
219 else if (!updates)
220 is_later = false;
221 else
222 is_later = fence_is_later(updates, flushed_updates);
220 223
224 if (pd_addr != vm_id->pd_gpu_addr || is_later) {
221 trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); 225 trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
222 if (is_earlier) { 226 if (is_later) {
223 vm_id->flushed_updates = fence_get(updates); 227 vm_id->flushed_updates = fence_get(updates);
224 fence_put(flushed_updates); 228 fence_put(flushed_updates);
225 } 229 }
226 if (!flushed_updates)
227 vm_id->flushed_updates = fence_get(updates);
228 vm_id->pd_gpu_addr = pd_addr; 230 vm_id->pd_gpu_addr = pd_addr;
229 amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); 231 amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
230 } 232 }
@@ -244,16 +246,14 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
244 */ 246 */
245void amdgpu_vm_fence(struct amdgpu_device *adev, 247void amdgpu_vm_fence(struct amdgpu_device *adev,
246 struct amdgpu_vm *vm, 248 struct amdgpu_vm *vm,
247 struct amdgpu_fence *fence) 249 struct fence *fence)
248{ 250{
249 unsigned ridx = fence->ring->idx; 251 struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence);
250 unsigned vm_id = vm->ids[ridx].id; 252 unsigned vm_id = vm->ids[ring->idx].id;
251
252 fence_put(adev->vm_manager.active[vm_id]);
253 adev->vm_manager.active[vm_id] = fence_get(&fence->base);
254 253
255 fence_put(vm->ids[ridx].last_id_use); 254 fence_put(adev->vm_manager.ids[vm_id].active);
256 vm->ids[ridx].last_id_use = fence_get(&fence->base); 255 adev->vm_manager.ids[vm_id].active = fence_get(fence);
256 atomic_long_set(&adev->vm_manager.ids[vm_id].owner, (long)vm);
257} 257}
258 258
259/** 259/**
@@ -332,6 +332,8 @@ int amdgpu_vm_free_job(struct amdgpu_job *job)
332 * 332 *
333 * @adev: amdgpu_device pointer 333 * @adev: amdgpu_device pointer
334 * @bo: bo to clear 334 * @bo: bo to clear
335 *
336 * need to reserve bo first before calling it.
335 */ 337 */
336static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, 338static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
337 struct amdgpu_bo *bo) 339 struct amdgpu_bo *bo)
@@ -343,24 +345,20 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
343 uint64_t addr; 345 uint64_t addr;
344 int r; 346 int r;
345 347
346 r = amdgpu_bo_reserve(bo, false);
347 if (r)
348 return r;
349
350 r = reservation_object_reserve_shared(bo->tbo.resv); 348 r = reservation_object_reserve_shared(bo->tbo.resv);
351 if (r) 349 if (r)
352 return r; 350 return r;
353 351
354 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 352 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
355 if (r) 353 if (r)
356 goto error_unreserve; 354 goto error;
357 355
358 addr = amdgpu_bo_gpu_offset(bo); 356 addr = amdgpu_bo_gpu_offset(bo);
359 entries = amdgpu_bo_size(bo) / 8; 357 entries = amdgpu_bo_size(bo) / 8;
360 358
361 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); 359 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
362 if (!ib) 360 if (!ib)
363 goto error_unreserve; 361 goto error;
364 362
365 r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib); 363 r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib);
366 if (r) 364 if (r)
@@ -378,16 +376,14 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
378 if (!r) 376 if (!r)
379 amdgpu_bo_fence(bo, fence, true); 377 amdgpu_bo_fence(bo, fence, true);
380 fence_put(fence); 378 fence_put(fence);
381 if (amdgpu_enable_scheduler) { 379 if (amdgpu_enable_scheduler)
382 amdgpu_bo_unreserve(bo);
383 return 0; 380 return 0;
384 } 381
385error_free: 382error_free:
386 amdgpu_ib_free(adev, ib); 383 amdgpu_ib_free(adev, ib);
387 kfree(ib); 384 kfree(ib);
388 385
389error_unreserve: 386error:
390 amdgpu_bo_unreserve(bo);
391 return r; 387 return r;
392} 388}
393 389
@@ -989,7 +985,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
989 * Add a mapping of the BO at the specefied addr into the VM. 985 * Add a mapping of the BO at the specefied addr into the VM.
990 * Returns 0 for success, error for failure. 986 * Returns 0 for success, error for failure.
991 * 987 *
992 * Object has to be reserved and gets unreserved by this function! 988 * Object has to be reserved and unreserved outside!
993 */ 989 */
994int amdgpu_vm_bo_map(struct amdgpu_device *adev, 990int amdgpu_vm_bo_map(struct amdgpu_device *adev,
995 struct amdgpu_bo_va *bo_va, 991 struct amdgpu_bo_va *bo_va,
@@ -1005,30 +1001,27 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1005 1001
1006 /* validate the parameters */ 1002 /* validate the parameters */
1007 if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || 1003 if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
1008 size == 0 || size & AMDGPU_GPU_PAGE_MASK) { 1004 size == 0 || size & AMDGPU_GPU_PAGE_MASK)
1009 amdgpu_bo_unreserve(bo_va->bo);
1010 return -EINVAL; 1005 return -EINVAL;
1011 }
1012 1006
1013 /* make sure object fit at this offset */ 1007 /* make sure object fit at this offset */
1014 eaddr = saddr + size; 1008 eaddr = saddr + size;
1015 if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) { 1009 if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo)))
1016 amdgpu_bo_unreserve(bo_va->bo);
1017 return -EINVAL; 1010 return -EINVAL;
1018 }
1019 1011
1020 last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; 1012 last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
1021 if (last_pfn > adev->vm_manager.max_pfn) { 1013 if (last_pfn > adev->vm_manager.max_pfn) {
1022 dev_err(adev->dev, "va above limit (0x%08X > 0x%08X)\n", 1014 dev_err(adev->dev, "va above limit (0x%08X > 0x%08X)\n",
1023 last_pfn, adev->vm_manager.max_pfn); 1015 last_pfn, adev->vm_manager.max_pfn);
1024 amdgpu_bo_unreserve(bo_va->bo);
1025 return -EINVAL; 1016 return -EINVAL;
1026 } 1017 }
1027 1018
1028 saddr /= AMDGPU_GPU_PAGE_SIZE; 1019 saddr /= AMDGPU_GPU_PAGE_SIZE;
1029 eaddr /= AMDGPU_GPU_PAGE_SIZE; 1020 eaddr /= AMDGPU_GPU_PAGE_SIZE;
1030 1021
1022 spin_lock(&vm->it_lock);
1031 it = interval_tree_iter_first(&vm->va, saddr, eaddr - 1); 1023 it = interval_tree_iter_first(&vm->va, saddr, eaddr - 1);
1024 spin_unlock(&vm->it_lock);
1032 if (it) { 1025 if (it) {
1033 struct amdgpu_bo_va_mapping *tmp; 1026 struct amdgpu_bo_va_mapping *tmp;
1034 tmp = container_of(it, struct amdgpu_bo_va_mapping, it); 1027 tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
@@ -1036,14 +1029,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1036 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " 1029 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
1037 "0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr, 1030 "0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr,
1038 tmp->it.start, tmp->it.last + 1); 1031 tmp->it.start, tmp->it.last + 1);
1039 amdgpu_bo_unreserve(bo_va->bo);
1040 r = -EINVAL; 1032 r = -EINVAL;
1041 goto error; 1033 goto error;
1042 } 1034 }
1043 1035
1044 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); 1036 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1045 if (!mapping) { 1037 if (!mapping) {
1046 amdgpu_bo_unreserve(bo_va->bo);
1047 r = -ENOMEM; 1038 r = -ENOMEM;
1048 goto error; 1039 goto error;
1049 } 1040 }
@@ -1055,7 +1046,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1055 mapping->flags = flags; 1046 mapping->flags = flags;
1056 1047
1057 list_add(&mapping->list, &bo_va->invalids); 1048 list_add(&mapping->list, &bo_va->invalids);
1049 spin_lock(&vm->it_lock);
1058 interval_tree_insert(&mapping->it, &vm->va); 1050 interval_tree_insert(&mapping->it, &vm->va);
1051 spin_unlock(&vm->it_lock);
1059 trace_amdgpu_vm_bo_map(bo_va, mapping); 1052 trace_amdgpu_vm_bo_map(bo_va, mapping);
1060 1053
1061 /* Make sure the page tables are allocated */ 1054 /* Make sure the page tables are allocated */
@@ -1067,8 +1060,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1067 if (eaddr > vm->max_pde_used) 1060 if (eaddr > vm->max_pde_used)
1068 vm->max_pde_used = eaddr; 1061 vm->max_pde_used = eaddr;
1069 1062
1070 amdgpu_bo_unreserve(bo_va->bo);
1071
1072 /* walk over the address space and allocate the page tables */ 1063 /* walk over the address space and allocate the page tables */
1073 for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { 1064 for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) {
1074 struct reservation_object *resv = vm->page_directory->tbo.resv; 1065 struct reservation_object *resv = vm->page_directory->tbo.resv;
@@ -1077,13 +1068,11 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1077 if (vm->page_tables[pt_idx].bo) 1068 if (vm->page_tables[pt_idx].bo)
1078 continue; 1069 continue;
1079 1070
1080 ww_mutex_lock(&resv->lock, NULL);
1081 r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, 1071 r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
1082 AMDGPU_GPU_PAGE_SIZE, true, 1072 AMDGPU_GPU_PAGE_SIZE, true,
1083 AMDGPU_GEM_DOMAIN_VRAM, 1073 AMDGPU_GEM_DOMAIN_VRAM,
1084 AMDGPU_GEM_CREATE_NO_CPU_ACCESS, 1074 AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
1085 NULL, resv, &pt); 1075 NULL, resv, &pt);
1086 ww_mutex_unlock(&resv->lock);
1087 if (r) 1076 if (r)
1088 goto error_free; 1077 goto error_free;
1089 1078
@@ -1101,7 +1090,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1101 1090
1102error_free: 1091error_free:
1103 list_del(&mapping->list); 1092 list_del(&mapping->list);
1093 spin_lock(&vm->it_lock);
1104 interval_tree_remove(&mapping->it, &vm->va); 1094 interval_tree_remove(&mapping->it, &vm->va);
1095 spin_unlock(&vm->it_lock);
1105 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1096 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
1106 kfree(mapping); 1097 kfree(mapping);
1107 1098
@@ -1119,7 +1110,7 @@ error:
1119 * Remove a mapping of the BO at the specefied addr from the VM. 1110 * Remove a mapping of the BO at the specefied addr from the VM.
1120 * Returns 0 for success, error for failure. 1111 * Returns 0 for success, error for failure.
1121 * 1112 *
1122 * Object has to be reserved and gets unreserved by this function! 1113 * Object has to be reserved and unreserved outside!
1123 */ 1114 */
1124int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, 1115int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1125 struct amdgpu_bo_va *bo_va, 1116 struct amdgpu_bo_va *bo_va,
@@ -1144,21 +1135,20 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1144 break; 1135 break;
1145 } 1136 }
1146 1137
1147 if (&mapping->list == &bo_va->invalids) { 1138 if (&mapping->list == &bo_va->invalids)
1148 amdgpu_bo_unreserve(bo_va->bo);
1149 return -ENOENT; 1139 return -ENOENT;
1150 }
1151 } 1140 }
1152 1141
1153 list_del(&mapping->list); 1142 list_del(&mapping->list);
1143 spin_lock(&vm->it_lock);
1154 interval_tree_remove(&mapping->it, &vm->va); 1144 interval_tree_remove(&mapping->it, &vm->va);
1145 spin_unlock(&vm->it_lock);
1155 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1146 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
1156 1147
1157 if (valid) 1148 if (valid)
1158 list_add(&mapping->list, &vm->freed); 1149 list_add(&mapping->list, &vm->freed);
1159 else 1150 else
1160 kfree(mapping); 1151 kfree(mapping);
1161 amdgpu_bo_unreserve(bo_va->bo);
1162 1152
1163 return 0; 1153 return 0;
1164} 1154}
@@ -1187,13 +1177,17 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
1187 1177
1188 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 1178 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
1189 list_del(&mapping->list); 1179 list_del(&mapping->list);
1180 spin_lock(&vm->it_lock);
1190 interval_tree_remove(&mapping->it, &vm->va); 1181 interval_tree_remove(&mapping->it, &vm->va);
1182 spin_unlock(&vm->it_lock);
1191 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1183 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
1192 list_add(&mapping->list, &vm->freed); 1184 list_add(&mapping->list, &vm->freed);
1193 } 1185 }
1194 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { 1186 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
1195 list_del(&mapping->list); 1187 list_del(&mapping->list);
1188 spin_lock(&vm->it_lock);
1196 interval_tree_remove(&mapping->it, &vm->va); 1189 interval_tree_remove(&mapping->it, &vm->va);
1190 spin_unlock(&vm->it_lock);
1197 kfree(mapping); 1191 kfree(mapping);
1198 } 1192 }
1199 1193
@@ -1241,7 +1235,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1241 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1235 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
1242 vm->ids[i].id = 0; 1236 vm->ids[i].id = 0;
1243 vm->ids[i].flushed_updates = NULL; 1237 vm->ids[i].flushed_updates = NULL;
1244 vm->ids[i].last_id_use = NULL;
1245 } 1238 }
1246 mutex_init(&vm->mutex); 1239 mutex_init(&vm->mutex);
1247 vm->va = RB_ROOT; 1240 vm->va = RB_ROOT;
@@ -1249,7 +1242,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1249 INIT_LIST_HEAD(&vm->invalidated); 1242 INIT_LIST_HEAD(&vm->invalidated);
1250 INIT_LIST_HEAD(&vm->cleared); 1243 INIT_LIST_HEAD(&vm->cleared);
1251 INIT_LIST_HEAD(&vm->freed); 1244 INIT_LIST_HEAD(&vm->freed);
1252 1245 spin_lock_init(&vm->it_lock);
1253 pd_size = amdgpu_vm_directory_size(adev); 1246 pd_size = amdgpu_vm_directory_size(adev);
1254 pd_entries = amdgpu_vm_num_pdes(adev); 1247 pd_entries = amdgpu_vm_num_pdes(adev);
1255 1248
@@ -1269,8 +1262,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1269 NULL, NULL, &vm->page_directory); 1262 NULL, NULL, &vm->page_directory);
1270 if (r) 1263 if (r)
1271 return r; 1264 return r;
1272 1265 r = amdgpu_bo_reserve(vm->page_directory, false);
1266 if (r) {
1267 amdgpu_bo_unref(&vm->page_directory);
1268 vm->page_directory = NULL;
1269 return r;
1270 }
1273 r = amdgpu_vm_clear_bo(adev, vm->page_directory); 1271 r = amdgpu_vm_clear_bo(adev, vm->page_directory);
1272 amdgpu_bo_unreserve(vm->page_directory);
1274 if (r) { 1273 if (r) {
1275 amdgpu_bo_unref(&vm->page_directory); 1274 amdgpu_bo_unref(&vm->page_directory);
1276 vm->page_directory = NULL; 1275 vm->page_directory = NULL;
@@ -1313,11 +1312,28 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1313 1312
1314 amdgpu_bo_unref(&vm->page_directory); 1313 amdgpu_bo_unref(&vm->page_directory);
1315 fence_put(vm->page_directory_fence); 1314 fence_put(vm->page_directory_fence);
1316
1317 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1315 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
1316 unsigned id = vm->ids[i].id;
1317
1318 atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner,
1319 (long)vm, 0);
1318 fence_put(vm->ids[i].flushed_updates); 1320 fence_put(vm->ids[i].flushed_updates);
1319 fence_put(vm->ids[i].last_id_use);
1320 } 1321 }
1321 1322
1322 mutex_destroy(&vm->mutex); 1323 mutex_destroy(&vm->mutex);
1323} 1324}
1325
1326/**
1327 * amdgpu_vm_manager_fini - cleanup VM manager
1328 *
1329 * @adev: amdgpu_device pointer
1330 *
1331 * Cleanup the VM manager and free resources.
1332 */
1333void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
1334{
1335 unsigned i;
1336
1337 for (i = 0; i < AMDGPU_NUM_VM; ++i)
1338 fence_put(adev->vm_manager.ids[i].active);
1339}
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index a1a35a5df8e7..57a2e347f04d 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6569,12 +6569,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev,
6569 switch (state) { 6569 switch (state) {
6570 case AMDGPU_IRQ_STATE_DISABLE: 6570 case AMDGPU_IRQ_STATE_DISABLE:
6571 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); 6571 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
6572 cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; 6572 cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
6573 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); 6573 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
6574 break; 6574 break;
6575 case AMDGPU_IRQ_STATE_ENABLE: 6575 case AMDGPU_IRQ_STATE_ENABLE:
6576 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); 6576 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
6577 cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; 6577 cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
6578 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); 6578 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
6579 break; 6579 break;
6580 default: 6580 default:
@@ -6586,12 +6586,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev,
6586 switch (state) { 6586 switch (state) {
6587 case AMDGPU_IRQ_STATE_DISABLE: 6587 case AMDGPU_IRQ_STATE_DISABLE:
6588 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); 6588 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
6589 cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; 6589 cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
6590 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); 6590 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
6591 break; 6591 break;
6592 case AMDGPU_IRQ_STATE_ENABLE: 6592 case AMDGPU_IRQ_STATE_ENABLE:
6593 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); 6593 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
6594 cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; 6594 cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
6595 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); 6595 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
6596 break; 6596 break;
6597 default: 6597 default:
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 6776cf756d40..e1dcab98e249 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -268,7 +268,6 @@ static const u32 fiji_mgcg_cgcg_init[] =
268 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 268 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
269 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 269 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
270 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 270 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
271 mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100,
272 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 271 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
273 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 272 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
274 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 273 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
@@ -296,10 +295,6 @@ static const u32 fiji_mgcg_cgcg_init[] =
296 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 295 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
297 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 296 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
298 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 297 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
299 mmPCIE_INDEX, 0xffffffff, 0x0140001c,
300 mmPCIE_DATA, 0x000f0000, 0x00000000,
301 mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
302 mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
303 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 298 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
304}; 299};
305 300
@@ -1000,7 +995,7 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1000 adev->gfx.config.max_cu_per_sh = 16; 995 adev->gfx.config.max_cu_per_sh = 16;
1001 adev->gfx.config.max_sh_per_se = 1; 996 adev->gfx.config.max_sh_per_se = 1;
1002 adev->gfx.config.max_backends_per_se = 4; 997 adev->gfx.config.max_backends_per_se = 4;
1003 adev->gfx.config.max_texture_channel_caches = 8; 998 adev->gfx.config.max_texture_channel_caches = 16;
1004 adev->gfx.config.max_gprs = 256; 999 adev->gfx.config.max_gprs = 256;
1005 adev->gfx.config.max_gs_threads = 32; 1000 adev->gfx.config.max_gs_threads = 32;
1006 adev->gfx.config.max_hw_contexts = 8; 1001 adev->gfx.config.max_hw_contexts = 8;
@@ -1613,6 +1608,296 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1613 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); 1608 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1614 } 1609 }
1615 case CHIP_FIJI: 1610 case CHIP_FIJI:
1611 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1612 switch (reg_offset) {
1613 case 0:
1614 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1615 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1616 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1617 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1618 break;
1619 case 1:
1620 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1621 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1622 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1623 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1624 break;
1625 case 2:
1626 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1627 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1628 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1629 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1630 break;
1631 case 3:
1632 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1633 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1634 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1635 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1636 break;
1637 case 4:
1638 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1639 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1640 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1641 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1642 break;
1643 case 5:
1644 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1645 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1646 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1647 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1648 break;
1649 case 6:
1650 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1651 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1652 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1653 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1654 break;
1655 case 7:
1656 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1657 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1658 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1659 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1660 break;
1661 case 8:
1662 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1663 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1664 break;
1665 case 9:
1666 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1667 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1668 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1669 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1670 break;
1671 case 10:
1672 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1673 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1674 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1675 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1676 break;
1677 case 11:
1678 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1679 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1680 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1682 break;
1683 case 12:
1684 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1685 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1686 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1687 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1688 break;
1689 case 13:
1690 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1691 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1692 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1694 break;
1695 case 14:
1696 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1697 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1698 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1700 break;
1701 case 15:
1702 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1703 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1704 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1706 break;
1707 case 16:
1708 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1709 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1710 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1711 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1712 break;
1713 case 17:
1714 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1715 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1716 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1718 break;
1719 case 18:
1720 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1721 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1722 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1723 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1724 break;
1725 case 19:
1726 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1727 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1728 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1730 break;
1731 case 20:
1732 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1733 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1734 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1735 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1736 break;
1737 case 21:
1738 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1739 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1740 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1742 break;
1743 case 22:
1744 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1745 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1746 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1748 break;
1749 case 23:
1750 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1751 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1752 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1754 break;
1755 case 24:
1756 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1757 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1758 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1759 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1760 break;
1761 case 25:
1762 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1763 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1764 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1765 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1766 break;
1767 case 26:
1768 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1769 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1770 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1771 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1772 break;
1773 case 27:
1774 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1775 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1776 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1777 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1778 break;
1779 case 28:
1780 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1781 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1782 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1783 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1784 break;
1785 case 29:
1786 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1787 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1788 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1789 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1790 break;
1791 case 30:
1792 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1793 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1794 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1795 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1796 break;
1797 default:
1798 gb_tile_moden = 0;
1799 break;
1800 }
1801 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1802 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1803 }
1804 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1805 switch (reg_offset) {
1806 case 0:
1807 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1808 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1809 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1810 NUM_BANKS(ADDR_SURF_8_BANK));
1811 break;
1812 case 1:
1813 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1814 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1815 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1816 NUM_BANKS(ADDR_SURF_8_BANK));
1817 break;
1818 case 2:
1819 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1820 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1821 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1822 NUM_BANKS(ADDR_SURF_8_BANK));
1823 break;
1824 case 3:
1825 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1826 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1827 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1828 NUM_BANKS(ADDR_SURF_8_BANK));
1829 break;
1830 case 4:
1831 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1834 NUM_BANKS(ADDR_SURF_8_BANK));
1835 break;
1836 case 5:
1837 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1838 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1839 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1840 NUM_BANKS(ADDR_SURF_8_BANK));
1841 break;
1842 case 6:
1843 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1844 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1845 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1846 NUM_BANKS(ADDR_SURF_8_BANK));
1847 break;
1848 case 8:
1849 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1852 NUM_BANKS(ADDR_SURF_8_BANK));
1853 break;
1854 case 9:
1855 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1856 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1857 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1858 NUM_BANKS(ADDR_SURF_8_BANK));
1859 break;
1860 case 10:
1861 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1864 NUM_BANKS(ADDR_SURF_8_BANK));
1865 break;
1866 case 11:
1867 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1868 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1869 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1870 NUM_BANKS(ADDR_SURF_8_BANK));
1871 break;
1872 case 12:
1873 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1874 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1875 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1876 NUM_BANKS(ADDR_SURF_8_BANK));
1877 break;
1878 case 13:
1879 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1880 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1881 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1882 NUM_BANKS(ADDR_SURF_8_BANK));
1883 break;
1884 case 14:
1885 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1888 NUM_BANKS(ADDR_SURF_4_BANK));
1889 break;
1890 case 7:
1891 /* unused idx */
1892 continue;
1893 default:
1894 gb_tile_moden = 0;
1895 break;
1896 }
1897 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1898 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1899 }
1900 break;
1616 case CHIP_TONGA: 1901 case CHIP_TONGA:
1617 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1902 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1618 switch (reg_offset) { 1903 switch (reg_offset) {
@@ -2971,10 +3256,13 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
2971 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 3256 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2972 switch (adev->asic_type) { 3257 switch (adev->asic_type) {
2973 case CHIP_TONGA: 3258 case CHIP_TONGA:
2974 case CHIP_FIJI:
2975 amdgpu_ring_write(ring, 0x16000012); 3259 amdgpu_ring_write(ring, 0x16000012);
2976 amdgpu_ring_write(ring, 0x0000002A); 3260 amdgpu_ring_write(ring, 0x0000002A);
2977 break; 3261 break;
3262 case CHIP_FIJI:
3263 amdgpu_ring_write(ring, 0x3a00161a);
3264 amdgpu_ring_write(ring, 0x0000002e);
3265 break;
2978 case CHIP_TOPAZ: 3266 case CHIP_TOPAZ:
2979 case CHIP_CARRIZO: 3267 case CHIP_CARRIZO:
2980 amdgpu_ring_write(ring, 0x00000002); 3268 amdgpu_ring_write(ring, 0x00000002);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 85bbcdc73fff..7427d8cd4c43 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -40,7 +40,7 @@
40static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev); 40static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
41static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev); 41static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
42 42
43MODULE_FIRMWARE("radeon/boniare_mc.bin"); 43MODULE_FIRMWARE("radeon/bonaire_mc.bin");
44MODULE_FIRMWARE("radeon/hawaii_mc.bin"); 44MODULE_FIRMWARE("radeon/hawaii_mc.bin");
45 45
46/** 46/**
@@ -501,6 +501,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
501 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1); 501 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
502 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7); 502 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
503 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); 503 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
504 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
504 WREG32(mmVM_L2_CNTL, tmp); 505 WREG32(mmVM_L2_CNTL, tmp);
505 tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); 506 tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
506 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); 507 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
@@ -960,12 +961,10 @@ static int gmc_v7_0_sw_init(void *handle)
960 961
961static int gmc_v7_0_sw_fini(void *handle) 962static int gmc_v7_0_sw_fini(void *handle)
962{ 963{
963 int i;
964 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 964 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
965 965
966 if (adev->vm_manager.enabled) { 966 if (adev->vm_manager.enabled) {
967 for (i = 0; i < AMDGPU_NUM_VM; ++i) 967 amdgpu_vm_manager_fini(adev);
968 fence_put(adev->vm_manager.active[i]);
969 gmc_v7_0_vm_fini(adev); 968 gmc_v7_0_vm_fini(adev);
970 adev->vm_manager.enabled = false; 969 adev->vm_manager.enabled = false;
971 } 970 }
@@ -1010,12 +1009,10 @@ static int gmc_v7_0_hw_fini(void *handle)
1010 1009
1011static int gmc_v7_0_suspend(void *handle) 1010static int gmc_v7_0_suspend(void *handle)
1012{ 1011{
1013 int i;
1014 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1012 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1015 1013
1016 if (adev->vm_manager.enabled) { 1014 if (adev->vm_manager.enabled) {
1017 for (i = 0; i < AMDGPU_NUM_VM; ++i) 1015 amdgpu_vm_manager_fini(adev);
1018 fence_put(adev->vm_manager.active[i]);
1019 gmc_v7_0_vm_fini(adev); 1016 gmc_v7_0_vm_fini(adev);
1020 adev->vm_manager.enabled = false; 1017 adev->vm_manager.enabled = false;
1021 } 1018 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 1bcc4e74e3b4..cb0e50ebb528 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -629,6 +629,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
629 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1); 629 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
630 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7); 630 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
631 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); 631 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
632 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
632 WREG32(mmVM_L2_CNTL, tmp); 633 WREG32(mmVM_L2_CNTL, tmp);
633 tmp = RREG32(mmVM_L2_CNTL2); 634 tmp = RREG32(mmVM_L2_CNTL2);
634 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); 635 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
@@ -979,12 +980,10 @@ static int gmc_v8_0_sw_init(void *handle)
979 980
980static int gmc_v8_0_sw_fini(void *handle) 981static int gmc_v8_0_sw_fini(void *handle)
981{ 982{
982 int i;
983 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 983 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
984 984
985 if (adev->vm_manager.enabled) { 985 if (adev->vm_manager.enabled) {
986 for (i = 0; i < AMDGPU_NUM_VM; ++i) 986 amdgpu_vm_manager_fini(adev);
987 fence_put(adev->vm_manager.active[i]);
988 gmc_v8_0_vm_fini(adev); 987 gmc_v8_0_vm_fini(adev);
989 adev->vm_manager.enabled = false; 988 adev->vm_manager.enabled = false;
990 } 989 }
@@ -1031,12 +1030,10 @@ static int gmc_v8_0_hw_fini(void *handle)
1031 1030
1032static int gmc_v8_0_suspend(void *handle) 1031static int gmc_v8_0_suspend(void *handle)
1033{ 1032{
1034 int i;
1035 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1033 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1036 1034
1037 if (adev->vm_manager.enabled) { 1035 if (adev->vm_manager.enabled) {
1038 for (i = 0; i < AMDGPU_NUM_VM; ++i) 1036 amdgpu_vm_manager_fini(adev);
1039 fence_put(adev->vm_manager.active[i]);
1040 gmc_v8_0_vm_fini(adev); 1037 gmc_v8_0_vm_fini(adev);
1041 adev->vm_manager.enabled = false; 1038 adev->vm_manager.enabled = false;
1042 } 1039 }
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
index 144f50acc971..c89dc777768f 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
@@ -16,6 +16,8 @@ TRACE_EVENT(amd_sched_job,
16 TP_ARGS(sched_job), 16 TP_ARGS(sched_job),
17 TP_STRUCT__entry( 17 TP_STRUCT__entry(
18 __field(struct amd_sched_entity *, entity) 18 __field(struct amd_sched_entity *, entity)
19 __field(struct amd_sched_job *, sched_job)
20 __field(struct fence *, fence)
19 __field(const char *, name) 21 __field(const char *, name)
20 __field(u32, job_count) 22 __field(u32, job_count)
21 __field(int, hw_job_count) 23 __field(int, hw_job_count)
@@ -23,16 +25,32 @@ TRACE_EVENT(amd_sched_job,
23 25
24 TP_fast_assign( 26 TP_fast_assign(
25 __entry->entity = sched_job->s_entity; 27 __entry->entity = sched_job->s_entity;
28 __entry->sched_job = sched_job;
29 __entry->fence = &sched_job->s_fence->base;
26 __entry->name = sched_job->sched->name; 30 __entry->name = sched_job->sched->name;
27 __entry->job_count = kfifo_len( 31 __entry->job_count = kfifo_len(
28 &sched_job->s_entity->job_queue) / sizeof(sched_job); 32 &sched_job->s_entity->job_queue) / sizeof(sched_job);
29 __entry->hw_job_count = atomic_read( 33 __entry->hw_job_count = atomic_read(
30 &sched_job->sched->hw_rq_count); 34 &sched_job->sched->hw_rq_count);
31 ), 35 ),
32 TP_printk("entity=%p, ring=%s, job count:%u, hw job count:%d", 36 TP_printk("entity=%p, sched job=%p, fence=%p, ring=%s, job count:%u, hw job count:%d",
33 __entry->entity, __entry->name, __entry->job_count, 37 __entry->entity, __entry->sched_job, __entry->fence, __entry->name,
34 __entry->hw_job_count) 38 __entry->job_count, __entry->hw_job_count)
35); 39);
40
41TRACE_EVENT(amd_sched_process_job,
42 TP_PROTO(struct amd_sched_fence *fence),
43 TP_ARGS(fence),
44 TP_STRUCT__entry(
45 __field(struct fence *, fence)
46 ),
47
48 TP_fast_assign(
49 __entry->fence = &fence->base;
50 ),
51 TP_printk("fence=%p signaled", __entry->fence)
52);
53
36#endif 54#endif
37 55
38/* This part must be outside protection */ 56/* This part must be outside protection */
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 89619a5a4289..ea30d6ad4c13 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -34,6 +34,9 @@ static struct amd_sched_job *
34amd_sched_entity_pop_job(struct amd_sched_entity *entity); 34amd_sched_entity_pop_job(struct amd_sched_entity *entity);
35static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); 35static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
36 36
37struct kmem_cache *sched_fence_slab;
38atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);
39
37/* Initialize a given run queue struct */ 40/* Initialize a given run queue struct */
38static void amd_sched_rq_init(struct amd_sched_rq *rq) 41static void amd_sched_rq_init(struct amd_sched_rq *rq)
39{ 42{
@@ -273,22 +276,13 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
273 * 276 *
274 * Returns 0 for success, negative error code otherwise. 277 * Returns 0 for success, negative error code otherwise.
275 */ 278 */
276int amd_sched_entity_push_job(struct amd_sched_job *sched_job) 279void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
277{ 280{
278 struct amd_sched_entity *entity = sched_job->s_entity; 281 struct amd_sched_entity *entity = sched_job->s_entity;
279 struct amd_sched_fence *fence = amd_sched_fence_create(
280 entity, sched_job->owner);
281
282 if (!fence)
283 return -ENOMEM;
284
285 fence_get(&fence->base);
286 sched_job->s_fence = fence;
287 282
288 wait_event(entity->sched->job_scheduled, 283 wait_event(entity->sched->job_scheduled,
289 amd_sched_entity_in(sched_job)); 284 amd_sched_entity_in(sched_job));
290 trace_amd_sched_job(sched_job); 285 trace_amd_sched_job(sched_job);
291 return 0;
292} 286}
293 287
294/** 288/**
@@ -343,6 +337,7 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
343 list_del_init(&s_fence->list); 337 list_del_init(&s_fence->list);
344 spin_unlock_irqrestore(&sched->fence_list_lock, flags); 338 spin_unlock_irqrestore(&sched->fence_list_lock, flags);
345 } 339 }
340 trace_amd_sched_process_job(s_fence);
346 fence_put(&s_fence->base); 341 fence_put(&s_fence->base);
347 wake_up_interruptible(&sched->wake_up_worker); 342 wake_up_interruptible(&sched->wake_up_worker);
348} 343}
@@ -450,6 +445,13 @@ int amd_sched_init(struct amd_gpu_scheduler *sched,
450 init_waitqueue_head(&sched->wake_up_worker); 445 init_waitqueue_head(&sched->wake_up_worker);
451 init_waitqueue_head(&sched->job_scheduled); 446 init_waitqueue_head(&sched->job_scheduled);
452 atomic_set(&sched->hw_rq_count, 0); 447 atomic_set(&sched->hw_rq_count, 0);
448 if (atomic_inc_return(&sched_fence_slab_ref) == 1) {
449 sched_fence_slab = kmem_cache_create(
450 "amd_sched_fence", sizeof(struct amd_sched_fence), 0,
451 SLAB_HWCACHE_ALIGN, NULL);
452 if (!sched_fence_slab)
453 return -ENOMEM;
454 }
453 455
454 /* Each scheduler will run on a seperate kernel thread */ 456 /* Each scheduler will run on a seperate kernel thread */
455 sched->thread = kthread_run(amd_sched_main, sched, sched->name); 457 sched->thread = kthread_run(amd_sched_main, sched, sched->name);
@@ -470,4 +472,6 @@ void amd_sched_fini(struct amd_gpu_scheduler *sched)
470{ 472{
471 if (sched->thread) 473 if (sched->thread)
472 kthread_stop(sched->thread); 474 kthread_stop(sched->thread);
475 if (atomic_dec_and_test(&sched_fence_slab_ref))
476 kmem_cache_destroy(sched_fence_slab);
473} 477}
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 929e9aced041..939692b14f4b 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -30,6 +30,9 @@
30struct amd_gpu_scheduler; 30struct amd_gpu_scheduler;
31struct amd_sched_rq; 31struct amd_sched_rq;
32 32
33extern struct kmem_cache *sched_fence_slab;
34extern atomic_t sched_fence_slab_ref;
35
33/** 36/**
34 * A scheduler entity is a wrapper around a job queue or a group 37 * A scheduler entity is a wrapper around a job queue or a group
35 * of other entities. Entities take turns emitting jobs from their 38 * of other entities. Entities take turns emitting jobs from their
@@ -76,7 +79,6 @@ struct amd_sched_job {
76 struct amd_gpu_scheduler *sched; 79 struct amd_gpu_scheduler *sched;
77 struct amd_sched_entity *s_entity; 80 struct amd_sched_entity *s_entity;
78 struct amd_sched_fence *s_fence; 81 struct amd_sched_fence *s_fence;
79 void *owner;
80}; 82};
81 83
82extern const struct fence_ops amd_sched_fence_ops; 84extern const struct fence_ops amd_sched_fence_ops;
@@ -128,7 +130,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
128 uint32_t jobs); 130 uint32_t jobs);
129void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, 131void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
130 struct amd_sched_entity *entity); 132 struct amd_sched_entity *entity);
131int amd_sched_entity_push_job(struct amd_sched_job *sched_job); 133void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
132 134
133struct amd_sched_fence *amd_sched_fence_create( 135struct amd_sched_fence *amd_sched_fence_create(
134 struct amd_sched_entity *s_entity, void *owner); 136 struct amd_sched_entity *s_entity, void *owner);
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index d802638094f4..8d2130b9ff05 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -32,7 +32,7 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity
32 struct amd_sched_fence *fence = NULL; 32 struct amd_sched_fence *fence = NULL;
33 unsigned seq; 33 unsigned seq;
34 34
35 fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); 35 fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
36 if (fence == NULL) 36 if (fence == NULL)
37 return NULL; 37 return NULL;
38 fence->owner = owner; 38 fence->owner = owner;
@@ -71,11 +71,17 @@ static bool amd_sched_fence_enable_signaling(struct fence *f)
71 return true; 71 return true;
72} 72}
73 73
74static void amd_sched_fence_release(struct fence *f)
75{
76 struct amd_sched_fence *fence = to_amd_sched_fence(f);
77 kmem_cache_free(sched_fence_slab, fence);
78}
79
74const struct fence_ops amd_sched_fence_ops = { 80const struct fence_ops amd_sched_fence_ops = {
75 .get_driver_name = amd_sched_fence_get_driver_name, 81 .get_driver_name = amd_sched_fence_get_driver_name,
76 .get_timeline_name = amd_sched_fence_get_timeline_name, 82 .get_timeline_name = amd_sched_fence_get_timeline_name,
77 .enable_signaling = amd_sched_fence_enable_signaling, 83 .enable_signaling = amd_sched_fence_enable_signaling,
78 .signaled = NULL, 84 .signaled = NULL,
79 .wait = fence_default_wait, 85 .wait = fence_default_wait,
80 .release = NULL, 86 .release = amd_sched_fence_release,
81}; 87};