diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 120 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 177 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 101 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 24 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 30 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 94 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 138 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 302 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 9 |
17 files changed, 669 insertions, 377 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 615ce6d464fb..306f75700bf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -389,7 +389,6 @@ struct amdgpu_clock { | |||
389 | * Fences. | 389 | * Fences. |
390 | */ | 390 | */ |
391 | struct amdgpu_fence_driver { | 391 | struct amdgpu_fence_driver { |
392 | struct amdgpu_ring *ring; | ||
393 | uint64_t gpu_addr; | 392 | uint64_t gpu_addr; |
394 | volatile uint32_t *cpu_addr; | 393 | volatile uint32_t *cpu_addr; |
395 | /* sync_seq is protected by ring emission lock */ | 394 | /* sync_seq is protected by ring emission lock */ |
@@ -398,7 +397,7 @@ struct amdgpu_fence_driver { | |||
398 | bool initialized; | 397 | bool initialized; |
399 | struct amdgpu_irq_src *irq_src; | 398 | struct amdgpu_irq_src *irq_src; |
400 | unsigned irq_type; | 399 | unsigned irq_type; |
401 | struct delayed_work lockup_work; | 400 | struct timer_list fallback_timer; |
402 | wait_queue_head_t fence_queue; | 401 | wait_queue_head_t fence_queue; |
403 | }; | 402 | }; |
404 | 403 | ||
@@ -917,8 +916,8 @@ struct amdgpu_ring { | |||
917 | #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 | 916 | #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 |
918 | 917 | ||
919 | struct amdgpu_vm_pt { | 918 | struct amdgpu_vm_pt { |
920 | struct amdgpu_bo *bo; | 919 | struct amdgpu_bo *bo; |
921 | uint64_t addr; | 920 | uint64_t addr; |
922 | }; | 921 | }; |
923 | 922 | ||
924 | struct amdgpu_vm_id { | 923 | struct amdgpu_vm_id { |
@@ -926,8 +925,6 @@ struct amdgpu_vm_id { | |||
926 | uint64_t pd_gpu_addr; | 925 | uint64_t pd_gpu_addr; |
927 | /* last flushed PD/PT update */ | 926 | /* last flushed PD/PT update */ |
928 | struct fence *flushed_updates; | 927 | struct fence *flushed_updates; |
929 | /* last use of vmid */ | ||
930 | struct fence *last_id_use; | ||
931 | }; | 928 | }; |
932 | 929 | ||
933 | struct amdgpu_vm { | 930 | struct amdgpu_vm { |
@@ -957,24 +954,70 @@ struct amdgpu_vm { | |||
957 | 954 | ||
958 | /* for id and flush management per ring */ | 955 | /* for id and flush management per ring */ |
959 | struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS]; | 956 | struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS]; |
957 | /* for interval tree */ | ||
958 | spinlock_t it_lock; | ||
960 | }; | 959 | }; |
961 | 960 | ||
962 | struct amdgpu_vm_manager { | 961 | struct amdgpu_vm_manager { |
963 | struct fence *active[AMDGPU_NUM_VM]; | 962 | struct { |
964 | uint32_t max_pfn; | 963 | struct fence *active; |
964 | atomic_long_t owner; | ||
965 | } ids[AMDGPU_NUM_VM]; | ||
966 | |||
967 | uint32_t max_pfn; | ||
965 | /* number of VMIDs */ | 968 | /* number of VMIDs */ |
966 | unsigned nvm; | 969 | unsigned nvm; |
967 | /* vram base address for page table entry */ | 970 | /* vram base address for page table entry */ |
968 | u64 vram_base_offset; | 971 | u64 vram_base_offset; |
969 | /* is vm enabled? */ | 972 | /* is vm enabled? */ |
970 | bool enabled; | 973 | bool enabled; |
971 | /* for hw to save the PD addr on suspend/resume */ | ||
972 | uint32_t saved_table_addr[AMDGPU_NUM_VM]; | ||
973 | /* vm pte handling */ | 974 | /* vm pte handling */ |
974 | const struct amdgpu_vm_pte_funcs *vm_pte_funcs; | 975 | const struct amdgpu_vm_pte_funcs *vm_pte_funcs; |
975 | struct amdgpu_ring *vm_pte_funcs_ring; | 976 | struct amdgpu_ring *vm_pte_funcs_ring; |
976 | }; | 977 | }; |
977 | 978 | ||
979 | void amdgpu_vm_manager_fini(struct amdgpu_device *adev); | ||
980 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); | ||
981 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); | ||
982 | struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev, | ||
983 | struct amdgpu_vm *vm, | ||
984 | struct list_head *head); | ||
985 | int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | ||
986 | struct amdgpu_sync *sync); | ||
987 | void amdgpu_vm_flush(struct amdgpu_ring *ring, | ||
988 | struct amdgpu_vm *vm, | ||
989 | struct fence *updates); | ||
990 | void amdgpu_vm_fence(struct amdgpu_device *adev, | ||
991 | struct amdgpu_vm *vm, | ||
992 | struct fence *fence); | ||
993 | uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr); | ||
994 | int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, | ||
995 | struct amdgpu_vm *vm); | ||
996 | int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | ||
997 | struct amdgpu_vm *vm); | ||
998 | int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm, | ||
999 | struct amdgpu_sync *sync); | ||
1000 | int amdgpu_vm_bo_update(struct amdgpu_device *adev, | ||
1001 | struct amdgpu_bo_va *bo_va, | ||
1002 | struct ttm_mem_reg *mem); | ||
1003 | void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, | ||
1004 | struct amdgpu_bo *bo); | ||
1005 | struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, | ||
1006 | struct amdgpu_bo *bo); | ||
1007 | struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, | ||
1008 | struct amdgpu_vm *vm, | ||
1009 | struct amdgpu_bo *bo); | ||
1010 | int amdgpu_vm_bo_map(struct amdgpu_device *adev, | ||
1011 | struct amdgpu_bo_va *bo_va, | ||
1012 | uint64_t addr, uint64_t offset, | ||
1013 | uint64_t size, uint32_t flags); | ||
1014 | int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | ||
1015 | struct amdgpu_bo_va *bo_va, | ||
1016 | uint64_t addr); | ||
1017 | void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | ||
1018 | struct amdgpu_bo_va *bo_va); | ||
1019 | int amdgpu_vm_free_job(struct amdgpu_job *job); | ||
1020 | |||
978 | /* | 1021 | /* |
979 | * context related structures | 1022 | * context related structures |
980 | */ | 1023 | */ |
@@ -1211,6 +1254,7 @@ struct amdgpu_cs_parser { | |||
1211 | /* relocations */ | 1254 | /* relocations */ |
1212 | struct amdgpu_bo_list_entry *vm_bos; | 1255 | struct amdgpu_bo_list_entry *vm_bos; |
1213 | struct list_head validated; | 1256 | struct list_head validated; |
1257 | struct fence *fence; | ||
1214 | 1258 | ||
1215 | struct amdgpu_ib *ibs; | 1259 | struct amdgpu_ib *ibs; |
1216 | uint32_t num_ibs; | 1260 | uint32_t num_ibs; |
@@ -1226,7 +1270,7 @@ struct amdgpu_job { | |||
1226 | struct amdgpu_device *adev; | 1270 | struct amdgpu_device *adev; |
1227 | struct amdgpu_ib *ibs; | 1271 | struct amdgpu_ib *ibs; |
1228 | uint32_t num_ibs; | 1272 | uint32_t num_ibs; |
1229 | struct mutex job_lock; | 1273 | void *owner; |
1230 | struct amdgpu_user_fence uf; | 1274 | struct amdgpu_user_fence uf; |
1231 | int (*free_job)(struct amdgpu_job *job); | 1275 | int (*free_job)(struct amdgpu_job *job); |
1232 | }; | 1276 | }; |
@@ -2257,11 +2301,6 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev); | |||
2257 | bool amdgpu_card_posted(struct amdgpu_device *adev); | 2301 | bool amdgpu_card_posted(struct amdgpu_device *adev); |
2258 | void amdgpu_update_display_priority(struct amdgpu_device *adev); | 2302 | void amdgpu_update_display_priority(struct amdgpu_device *adev); |
2259 | bool amdgpu_boot_test_post_card(struct amdgpu_device *adev); | 2303 | bool amdgpu_boot_test_post_card(struct amdgpu_device *adev); |
2260 | struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, | ||
2261 | struct drm_file *filp, | ||
2262 | struct amdgpu_ctx *ctx, | ||
2263 | struct amdgpu_ib *ibs, | ||
2264 | uint32_t num_ibs); | ||
2265 | 2304 | ||
2266 | int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); | 2305 | int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); |
2267 | int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, | 2306 | int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, |
@@ -2319,49 +2358,6 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, | |||
2319 | unsigned long arg); | 2358 | unsigned long arg); |
2320 | 2359 | ||
2321 | /* | 2360 | /* |
2322 | * vm | ||
2323 | */ | ||
2324 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); | ||
2325 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); | ||
2326 | struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev, | ||
2327 | struct amdgpu_vm *vm, | ||
2328 | struct list_head *head); | ||
2329 | int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | ||
2330 | struct amdgpu_sync *sync); | ||
2331 | void amdgpu_vm_flush(struct amdgpu_ring *ring, | ||
2332 | struct amdgpu_vm *vm, | ||
2333 | struct fence *updates); | ||
2334 | void amdgpu_vm_fence(struct amdgpu_device *adev, | ||
2335 | struct amdgpu_vm *vm, | ||
2336 | struct amdgpu_fence *fence); | ||
2337 | uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr); | ||
2338 | int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, | ||
2339 | struct amdgpu_vm *vm); | ||
2340 | int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | ||
2341 | struct amdgpu_vm *vm); | ||
2342 | int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, | ||
2343 | struct amdgpu_vm *vm, struct amdgpu_sync *sync); | ||
2344 | int amdgpu_vm_bo_update(struct amdgpu_device *adev, | ||
2345 | struct amdgpu_bo_va *bo_va, | ||
2346 | struct ttm_mem_reg *mem); | ||
2347 | void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, | ||
2348 | struct amdgpu_bo *bo); | ||
2349 | struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, | ||
2350 | struct amdgpu_bo *bo); | ||
2351 | struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, | ||
2352 | struct amdgpu_vm *vm, | ||
2353 | struct amdgpu_bo *bo); | ||
2354 | int amdgpu_vm_bo_map(struct amdgpu_device *adev, | ||
2355 | struct amdgpu_bo_va *bo_va, | ||
2356 | uint64_t addr, uint64_t offset, | ||
2357 | uint64_t size, uint32_t flags); | ||
2358 | int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | ||
2359 | struct amdgpu_bo_va *bo_va, | ||
2360 | uint64_t addr); | ||
2361 | void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | ||
2362 | struct amdgpu_bo_va *bo_va); | ||
2363 | int amdgpu_vm_free_job(struct amdgpu_job *job); | ||
2364 | /* | ||
2365 | * functions used by amdgpu_encoder.c | 2361 | * functions used by amdgpu_encoder.c |
2366 | */ | 2362 | */ |
2367 | struct amdgpu_afmt_acr { | 2363 | struct amdgpu_afmt_acr { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index dfc4d02c7a38..3afcf0237c25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -127,30 +127,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, | |||
127 | return 0; | 127 | return 0; |
128 | } | 128 | } |
129 | 129 | ||
130 | struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, | ||
131 | struct drm_file *filp, | ||
132 | struct amdgpu_ctx *ctx, | ||
133 | struct amdgpu_ib *ibs, | ||
134 | uint32_t num_ibs) | ||
135 | { | ||
136 | struct amdgpu_cs_parser *parser; | ||
137 | int i; | ||
138 | |||
139 | parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL); | ||
140 | if (!parser) | ||
141 | return NULL; | ||
142 | |||
143 | parser->adev = adev; | ||
144 | parser->filp = filp; | ||
145 | parser->ctx = ctx; | ||
146 | parser->ibs = ibs; | ||
147 | parser->num_ibs = num_ibs; | ||
148 | for (i = 0; i < num_ibs; i++) | ||
149 | ibs[i].ctx = ctx; | ||
150 | |||
151 | return parser; | ||
152 | } | ||
153 | |||
154 | int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | 130 | int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) |
155 | { | 131 | { |
156 | union drm_amdgpu_cs *cs = data; | 132 | union drm_amdgpu_cs *cs = data; |
@@ -463,8 +439,18 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a, | |||
463 | return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; | 439 | return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; |
464 | } | 440 | } |
465 | 441 | ||
466 | static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff) | 442 | /** |
443 | * cs_parser_fini() - clean parser states | ||
444 | * @parser: parser structure holding parsing context. | ||
445 | * @error: error number | ||
446 | * | ||
447 | * If error is set than unvalidate buffer, otherwise just free memory | ||
448 | * used by parsing context. | ||
449 | **/ | ||
450 | static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) | ||
467 | { | 451 | { |
452 | unsigned i; | ||
453 | |||
468 | if (!error) { | 454 | if (!error) { |
469 | /* Sort the buffer list from the smallest to largest buffer, | 455 | /* Sort the buffer list from the smallest to largest buffer, |
470 | * which affects the order of buffers in the LRU list. | 456 | * which affects the order of buffers in the LRU list. |
@@ -479,17 +465,14 @@ static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int err | |||
479 | list_sort(NULL, &parser->validated, cmp_size_smaller_first); | 465 | list_sort(NULL, &parser->validated, cmp_size_smaller_first); |
480 | 466 | ||
481 | ttm_eu_fence_buffer_objects(&parser->ticket, | 467 | ttm_eu_fence_buffer_objects(&parser->ticket, |
482 | &parser->validated, | 468 | &parser->validated, |
483 | &parser->ibs[parser->num_ibs-1].fence->base); | 469 | parser->fence); |
484 | } else if (backoff) { | 470 | } else if (backoff) { |
485 | ttm_eu_backoff_reservation(&parser->ticket, | 471 | ttm_eu_backoff_reservation(&parser->ticket, |
486 | &parser->validated); | 472 | &parser->validated); |
487 | } | 473 | } |
488 | } | 474 | fence_put(parser->fence); |
489 | 475 | ||
490 | static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) | ||
491 | { | ||
492 | unsigned i; | ||
493 | if (parser->ctx) | 476 | if (parser->ctx) |
494 | amdgpu_ctx_put(parser->ctx); | 477 | amdgpu_ctx_put(parser->ctx); |
495 | if (parser->bo_list) | 478 | if (parser->bo_list) |
@@ -499,31 +482,12 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) | |||
499 | for (i = 0; i < parser->nchunks; i++) | 482 | for (i = 0; i < parser->nchunks; i++) |
500 | drm_free_large(parser->chunks[i].kdata); | 483 | drm_free_large(parser->chunks[i].kdata); |
501 | kfree(parser->chunks); | 484 | kfree(parser->chunks); |
502 | if (!amdgpu_enable_scheduler) | 485 | if (parser->ibs) |
503 | { | 486 | for (i = 0; i < parser->num_ibs; i++) |
504 | if (parser->ibs) | 487 | amdgpu_ib_free(parser->adev, &parser->ibs[i]); |
505 | for (i = 0; i < parser->num_ibs; i++) | 488 | kfree(parser->ibs); |
506 | amdgpu_ib_free(parser->adev, &parser->ibs[i]); | 489 | if (parser->uf.bo) |
507 | kfree(parser->ibs); | 490 | drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); |
508 | if (parser->uf.bo) | ||
509 | drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); | ||
510 | } | ||
511 | |||
512 | kfree(parser); | ||
513 | } | ||
514 | |||
515 | /** | ||
516 | * cs_parser_fini() - clean parser states | ||
517 | * @parser: parser structure holding parsing context. | ||
518 | * @error: error number | ||
519 | * | ||
520 | * If error is set than unvalidate buffer, otherwise just free memory | ||
521 | * used by parsing context. | ||
522 | **/ | ||
523 | static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) | ||
524 | { | ||
525 | amdgpu_cs_parser_fini_early(parser, error, backoff); | ||
526 | amdgpu_cs_parser_fini_late(parser); | ||
527 | } | 491 | } |
528 | 492 | ||
529 | static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, | 493 | static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, |
@@ -610,15 +574,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, | |||
610 | } | 574 | } |
611 | 575 | ||
612 | r = amdgpu_bo_vm_update_pte(parser, vm); | 576 | r = amdgpu_bo_vm_update_pte(parser, vm); |
613 | if (r) { | 577 | if (!r) |
614 | goto out; | 578 | amdgpu_cs_sync_rings(parser); |
615 | } | ||
616 | amdgpu_cs_sync_rings(parser); | ||
617 | if (!amdgpu_enable_scheduler) | ||
618 | r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs, | ||
619 | parser->filp); | ||
620 | 579 | ||
621 | out: | ||
622 | return r; | 580 | return r; |
623 | } | 581 | } |
624 | 582 | ||
@@ -828,36 +786,36 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
828 | union drm_amdgpu_cs *cs = data; | 786 | union drm_amdgpu_cs *cs = data; |
829 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | 787 | struct amdgpu_fpriv *fpriv = filp->driver_priv; |
830 | struct amdgpu_vm *vm = &fpriv->vm; | 788 | struct amdgpu_vm *vm = &fpriv->vm; |
831 | struct amdgpu_cs_parser *parser; | 789 | struct amdgpu_cs_parser parser = {}; |
832 | bool reserved_buffers = false; | 790 | bool reserved_buffers = false; |
833 | int i, r; | 791 | int i, r; |
834 | 792 | ||
835 | if (!adev->accel_working) | 793 | if (!adev->accel_working) |
836 | return -EBUSY; | 794 | return -EBUSY; |
837 | 795 | ||
838 | parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0); | 796 | parser.adev = adev; |
839 | if (!parser) | 797 | parser.filp = filp; |
840 | return -ENOMEM; | 798 | |
841 | r = amdgpu_cs_parser_init(parser, data); | 799 | r = amdgpu_cs_parser_init(&parser, data); |
842 | if (r) { | 800 | if (r) { |
843 | DRM_ERROR("Failed to initialize parser !\n"); | 801 | DRM_ERROR("Failed to initialize parser !\n"); |
844 | amdgpu_cs_parser_fini(parser, r, false); | 802 | amdgpu_cs_parser_fini(&parser, r, false); |
845 | r = amdgpu_cs_handle_lockup(adev, r); | 803 | r = amdgpu_cs_handle_lockup(adev, r); |
846 | return r; | 804 | return r; |
847 | } | 805 | } |
848 | mutex_lock(&vm->mutex); | 806 | mutex_lock(&vm->mutex); |
849 | r = amdgpu_cs_parser_relocs(parser); | 807 | r = amdgpu_cs_parser_relocs(&parser); |
850 | if (r == -ENOMEM) | 808 | if (r == -ENOMEM) |
851 | DRM_ERROR("Not enough memory for command submission!\n"); | 809 | DRM_ERROR("Not enough memory for command submission!\n"); |
852 | else if (r && r != -ERESTARTSYS) | 810 | else if (r && r != -ERESTARTSYS) |
853 | DRM_ERROR("Failed to process the buffer list %d!\n", r); | 811 | DRM_ERROR("Failed to process the buffer list %d!\n", r); |
854 | else if (!r) { | 812 | else if (!r) { |
855 | reserved_buffers = true; | 813 | reserved_buffers = true; |
856 | r = amdgpu_cs_ib_fill(adev, parser); | 814 | r = amdgpu_cs_ib_fill(adev, &parser); |
857 | } | 815 | } |
858 | 816 | ||
859 | if (!r) { | 817 | if (!r) { |
860 | r = amdgpu_cs_dependencies(adev, parser); | 818 | r = amdgpu_cs_dependencies(adev, &parser); |
861 | if (r) | 819 | if (r) |
862 | DRM_ERROR("Failed in the dependencies handling %d!\n", r); | 820 | DRM_ERROR("Failed in the dependencies handling %d!\n", r); |
863 | } | 821 | } |
@@ -865,62 +823,71 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
865 | if (r) | 823 | if (r) |
866 | goto out; | 824 | goto out; |
867 | 825 | ||
868 | for (i = 0; i < parser->num_ibs; i++) | 826 | for (i = 0; i < parser.num_ibs; i++) |
869 | trace_amdgpu_cs(parser, i); | 827 | trace_amdgpu_cs(&parser, i); |
870 | 828 | ||
871 | r = amdgpu_cs_ib_vm_chunk(adev, parser); | 829 | r = amdgpu_cs_ib_vm_chunk(adev, &parser); |
872 | if (r) | 830 | if (r) |
873 | goto out; | 831 | goto out; |
874 | 832 | ||
875 | if (amdgpu_enable_scheduler && parser->num_ibs) { | 833 | if (amdgpu_enable_scheduler && parser.num_ibs) { |
834 | struct amdgpu_ring * ring = parser.ibs->ring; | ||
835 | struct amd_sched_fence *fence; | ||
876 | struct amdgpu_job *job; | 836 | struct amdgpu_job *job; |
877 | struct amdgpu_ring * ring = parser->ibs->ring; | 837 | |
878 | job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); | 838 | job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); |
879 | if (!job) { | 839 | if (!job) { |
880 | r = -ENOMEM; | 840 | r = -ENOMEM; |
881 | goto out; | 841 | goto out; |
882 | } | 842 | } |
843 | |||
883 | job->base.sched = &ring->sched; | 844 | job->base.sched = &ring->sched; |
884 | job->base.s_entity = &parser->ctx->rings[ring->idx].entity; | 845 | job->base.s_entity = &parser.ctx->rings[ring->idx].entity; |
885 | job->adev = parser->adev; | 846 | job->adev = parser.adev; |
886 | job->ibs = parser->ibs; | 847 | job->owner = parser.filp; |
887 | job->num_ibs = parser->num_ibs; | 848 | job->free_job = amdgpu_cs_free_job; |
888 | job->base.owner = parser->filp; | 849 | |
889 | mutex_init(&job->job_lock); | 850 | job->ibs = parser.ibs; |
851 | job->num_ibs = parser.num_ibs; | ||
852 | parser.ibs = NULL; | ||
853 | parser.num_ibs = 0; | ||
854 | |||
890 | if (job->ibs[job->num_ibs - 1].user) { | 855 | if (job->ibs[job->num_ibs - 1].user) { |
891 | memcpy(&job->uf, &parser->uf, | 856 | job->uf = parser.uf; |
892 | sizeof(struct amdgpu_user_fence)); | ||
893 | job->ibs[job->num_ibs - 1].user = &job->uf; | 857 | job->ibs[job->num_ibs - 1].user = &job->uf; |
858 | parser.uf.bo = NULL; | ||
894 | } | 859 | } |
895 | 860 | ||
896 | job->free_job = amdgpu_cs_free_job; | 861 | fence = amd_sched_fence_create(job->base.s_entity, |
897 | mutex_lock(&job->job_lock); | 862 | parser.filp); |
898 | r = amd_sched_entity_push_job(&job->base); | 863 | if (!fence) { |
899 | if (r) { | 864 | r = -ENOMEM; |
900 | mutex_unlock(&job->job_lock); | ||
901 | amdgpu_cs_free_job(job); | 865 | amdgpu_cs_free_job(job); |
902 | kfree(job); | 866 | kfree(job); |
903 | goto out; | 867 | goto out; |
904 | } | 868 | } |
905 | cs->out.handle = | 869 | job->base.s_fence = fence; |
906 | amdgpu_ctx_add_fence(parser->ctx, ring, | 870 | parser.fence = fence_get(&fence->base); |
907 | &job->base.s_fence->base); | ||
908 | parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle; | ||
909 | 871 | ||
910 | list_sort(NULL, &parser->validated, cmp_size_smaller_first); | 872 | cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring, |
911 | ttm_eu_fence_buffer_objects(&parser->ticket, | 873 | &fence->base); |
912 | &parser->validated, | 874 | job->ibs[job->num_ibs - 1].sequence = cs->out.handle; |
913 | &job->base.s_fence->base); | ||
914 | 875 | ||
915 | mutex_unlock(&job->job_lock); | 876 | trace_amdgpu_cs_ioctl(job); |
916 | amdgpu_cs_parser_fini_late(parser); | 877 | amd_sched_entity_push_job(&job->base); |
917 | mutex_unlock(&vm->mutex); | 878 | |
918 | return 0; | 879 | } else { |
880 | struct amdgpu_fence *fence; | ||
881 | |||
882 | r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs, | ||
883 | parser.filp); | ||
884 | fence = parser.ibs[parser.num_ibs - 1].fence; | ||
885 | parser.fence = fence_get(&fence->base); | ||
886 | cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence; | ||
919 | } | 887 | } |
920 | 888 | ||
921 | cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; | ||
922 | out: | 889 | out: |
923 | amdgpu_cs_parser_fini(parser, r, reserved_buffers); | 890 | amdgpu_cs_parser_fini(&parser, r, reserved_buffers); |
924 | mutex_unlock(&vm->mutex); | 891 | mutex_unlock(&vm->mutex); |
925 | r = amdgpu_cs_handle_lockup(adev, r); | 892 | r = amdgpu_cs_handle_lockup(adev, r); |
926 | return r; | 893 | return r; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 257d72205bb5..3671f9f220bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |||
@@ -47,6 +47,9 @@ | |||
47 | * that the the relevant GPU caches have been flushed. | 47 | * that the the relevant GPU caches have been flushed. |
48 | */ | 48 | */ |
49 | 49 | ||
50 | static struct kmem_cache *amdgpu_fence_slab; | ||
51 | static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0); | ||
52 | |||
50 | /** | 53 | /** |
51 | * amdgpu_fence_write - write a fence value | 54 | * amdgpu_fence_write - write a fence value |
52 | * | 55 | * |
@@ -85,24 +88,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) | |||
85 | } | 88 | } |
86 | 89 | ||
87 | /** | 90 | /** |
88 | * amdgpu_fence_schedule_check - schedule lockup check | ||
89 | * | ||
90 | * @ring: pointer to struct amdgpu_ring | ||
91 | * | ||
92 | * Queues a delayed work item to check for lockups. | ||
93 | */ | ||
94 | static void amdgpu_fence_schedule_check(struct amdgpu_ring *ring) | ||
95 | { | ||
96 | /* | ||
97 | * Do not reset the timer here with mod_delayed_work, | ||
98 | * this can livelock in an interaction with TTM delayed destroy. | ||
99 | */ | ||
100 | queue_delayed_work(system_power_efficient_wq, | ||
101 | &ring->fence_drv.lockup_work, | ||
102 | AMDGPU_FENCE_JIFFIES_TIMEOUT); | ||
103 | } | ||
104 | |||
105 | /** | ||
106 | * amdgpu_fence_emit - emit a fence on the requested ring | 91 | * amdgpu_fence_emit - emit a fence on the requested ring |
107 | * | 92 | * |
108 | * @ring: ring the fence is associated with | 93 | * @ring: ring the fence is associated with |
@@ -118,7 +103,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, | |||
118 | struct amdgpu_device *adev = ring->adev; | 103 | struct amdgpu_device *adev = ring->adev; |
119 | 104 | ||
120 | /* we are protected by the ring emission mutex */ | 105 | /* we are protected by the ring emission mutex */ |
121 | *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); | 106 | *fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); |
122 | if ((*fence) == NULL) { | 107 | if ((*fence) == NULL) { |
123 | return -ENOMEM; | 108 | return -ENOMEM; |
124 | } | 109 | } |
@@ -132,11 +117,23 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, | |||
132 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, | 117 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, |
133 | (*fence)->seq, | 118 | (*fence)->seq, |
134 | AMDGPU_FENCE_FLAG_INT); | 119 | AMDGPU_FENCE_FLAG_INT); |
135 | trace_amdgpu_fence_emit(ring->adev->ddev, ring->idx, (*fence)->seq); | ||
136 | return 0; | 120 | return 0; |
137 | } | 121 | } |
138 | 122 | ||
139 | /** | 123 | /** |
124 | * amdgpu_fence_schedule_fallback - schedule fallback check | ||
125 | * | ||
126 | * @ring: pointer to struct amdgpu_ring | ||
127 | * | ||
128 | * Start a timer as fallback to our interrupts. | ||
129 | */ | ||
130 | static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring) | ||
131 | { | ||
132 | mod_timer(&ring->fence_drv.fallback_timer, | ||
133 | jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT); | ||
134 | } | ||
135 | |||
136 | /** | ||
140 | * amdgpu_fence_activity - check for fence activity | 137 | * amdgpu_fence_activity - check for fence activity |
141 | * | 138 | * |
142 | * @ring: pointer to struct amdgpu_ring | 139 | * @ring: pointer to struct amdgpu_ring |
@@ -202,45 +199,38 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring) | |||
202 | } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq); | 199 | } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq); |
203 | 200 | ||
204 | if (seq < last_emitted) | 201 | if (seq < last_emitted) |
205 | amdgpu_fence_schedule_check(ring); | 202 | amdgpu_fence_schedule_fallback(ring); |
206 | 203 | ||
207 | return wake; | 204 | return wake; |
208 | } | 205 | } |
209 | 206 | ||
210 | /** | 207 | /** |
211 | * amdgpu_fence_check_lockup - check for hardware lockup | 208 | * amdgpu_fence_process - process a fence |
212 | * | 209 | * |
213 | * @work: delayed work item | 210 | * @adev: amdgpu_device pointer |
211 | * @ring: ring index the fence is associated with | ||
214 | * | 212 | * |
215 | * Checks for fence activity and if there is none probe | 213 | * Checks the current fence value and wakes the fence queue |
216 | * the hardware if a lockup occured. | 214 | * if the sequence number has increased (all asics). |
217 | */ | 215 | */ |
218 | static void amdgpu_fence_check_lockup(struct work_struct *work) | 216 | void amdgpu_fence_process(struct amdgpu_ring *ring) |
219 | { | 217 | { |
220 | struct amdgpu_fence_driver *fence_drv; | ||
221 | struct amdgpu_ring *ring; | ||
222 | |||
223 | fence_drv = container_of(work, struct amdgpu_fence_driver, | ||
224 | lockup_work.work); | ||
225 | ring = fence_drv->ring; | ||
226 | |||
227 | if (amdgpu_fence_activity(ring)) | 218 | if (amdgpu_fence_activity(ring)) |
228 | wake_up_all(&ring->fence_drv.fence_queue); | 219 | wake_up_all(&ring->fence_drv.fence_queue); |
229 | } | 220 | } |
230 | 221 | ||
231 | /** | 222 | /** |
232 | * amdgpu_fence_process - process a fence | 223 | * amdgpu_fence_fallback - fallback for hardware interrupts |
233 | * | 224 | * |
234 | * @adev: amdgpu_device pointer | 225 | * @work: delayed work item |
235 | * @ring: ring index the fence is associated with | ||
236 | * | 226 | * |
237 | * Checks the current fence value and wakes the fence queue | 227 | * Checks for fence activity. |
238 | * if the sequence number has increased (all asics). | ||
239 | */ | 228 | */ |
240 | void amdgpu_fence_process(struct amdgpu_ring *ring) | 229 | static void amdgpu_fence_fallback(unsigned long arg) |
241 | { | 230 | { |
242 | if (amdgpu_fence_activity(ring)) | 231 | struct amdgpu_ring *ring = (void *)arg; |
243 | wake_up_all(&ring->fence_drv.fence_queue); | 232 | |
233 | amdgpu_fence_process(ring); | ||
244 | } | 234 | } |
245 | 235 | ||
246 | /** | 236 | /** |
@@ -290,7 +280,7 @@ static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq) | |||
290 | if (atomic64_read(&ring->fence_drv.last_seq) >= seq) | 280 | if (atomic64_read(&ring->fence_drv.last_seq) >= seq) |
291 | return 0; | 281 | return 0; |
292 | 282 | ||
293 | amdgpu_fence_schedule_check(ring); | 283 | amdgpu_fence_schedule_fallback(ring); |
294 | wait_event(ring->fence_drv.fence_queue, ( | 284 | wait_event(ring->fence_drv.fence_queue, ( |
295 | (signaled = amdgpu_fence_seq_signaled(ring, seq)))); | 285 | (signaled = amdgpu_fence_seq_signaled(ring, seq)))); |
296 | 286 | ||
@@ -491,9 +481,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) | |||
491 | atomic64_set(&ring->fence_drv.last_seq, 0); | 481 | atomic64_set(&ring->fence_drv.last_seq, 0); |
492 | ring->fence_drv.initialized = false; | 482 | ring->fence_drv.initialized = false; |
493 | 483 | ||
494 | INIT_DELAYED_WORK(&ring->fence_drv.lockup_work, | 484 | setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, |
495 | amdgpu_fence_check_lockup); | 485 | (unsigned long)ring); |
496 | ring->fence_drv.ring = ring; | ||
497 | 486 | ||
498 | init_waitqueue_head(&ring->fence_drv.fence_queue); | 487 | init_waitqueue_head(&ring->fence_drv.fence_queue); |
499 | 488 | ||
@@ -536,6 +525,13 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) | |||
536 | */ | 525 | */ |
537 | int amdgpu_fence_driver_init(struct amdgpu_device *adev) | 526 | int amdgpu_fence_driver_init(struct amdgpu_device *adev) |
538 | { | 527 | { |
528 | if (atomic_inc_return(&amdgpu_fence_slab_ref) == 1) { | ||
529 | amdgpu_fence_slab = kmem_cache_create( | ||
530 | "amdgpu_fence", sizeof(struct amdgpu_fence), 0, | ||
531 | SLAB_HWCACHE_ALIGN, NULL); | ||
532 | if (!amdgpu_fence_slab) | ||
533 | return -ENOMEM; | ||
534 | } | ||
539 | if (amdgpu_debugfs_fence_init(adev)) | 535 | if (amdgpu_debugfs_fence_init(adev)) |
540 | dev_err(adev->dev, "fence debugfs file creation failed\n"); | 536 | dev_err(adev->dev, "fence debugfs file creation failed\n"); |
541 | 537 | ||
@@ -554,9 +550,12 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) | |||
554 | { | 550 | { |
555 | int i, r; | 551 | int i, r; |
556 | 552 | ||
553 | if (atomic_dec_and_test(&amdgpu_fence_slab_ref)) | ||
554 | kmem_cache_destroy(amdgpu_fence_slab); | ||
557 | mutex_lock(&adev->ring_lock); | 555 | mutex_lock(&adev->ring_lock); |
558 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { | 556 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { |
559 | struct amdgpu_ring *ring = adev->rings[i]; | 557 | struct amdgpu_ring *ring = adev->rings[i]; |
558 | |||
560 | if (!ring || !ring->fence_drv.initialized) | 559 | if (!ring || !ring->fence_drv.initialized) |
561 | continue; | 560 | continue; |
562 | r = amdgpu_fence_wait_empty(ring); | 561 | r = amdgpu_fence_wait_empty(ring); |
@@ -568,6 +567,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) | |||
568 | amdgpu_irq_put(adev, ring->fence_drv.irq_src, | 567 | amdgpu_irq_put(adev, ring->fence_drv.irq_src, |
569 | ring->fence_drv.irq_type); | 568 | ring->fence_drv.irq_type); |
570 | amd_sched_fini(&ring->sched); | 569 | amd_sched_fini(&ring->sched); |
570 | del_timer_sync(&ring->fence_drv.fallback_timer); | ||
571 | ring->fence_drv.initialized = false; | 571 | ring->fence_drv.initialized = false; |
572 | } | 572 | } |
573 | mutex_unlock(&adev->ring_lock); | 573 | mutex_unlock(&adev->ring_lock); |
@@ -751,18 +751,25 @@ static bool amdgpu_fence_enable_signaling(struct fence *f) | |||
751 | fence->fence_wake.func = amdgpu_fence_check_signaled; | 751 | fence->fence_wake.func = amdgpu_fence_check_signaled; |
752 | __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake); | 752 | __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake); |
753 | fence_get(f); | 753 | fence_get(f); |
754 | amdgpu_fence_schedule_check(ring); | 754 | if (!timer_pending(&ring->fence_drv.fallback_timer)) |
755 | amdgpu_fence_schedule_fallback(ring); | ||
755 | FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); | 756 | FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); |
756 | return true; | 757 | return true; |
757 | } | 758 | } |
758 | 759 | ||
760 | static void amdgpu_fence_release(struct fence *f) | ||
761 | { | ||
762 | struct amdgpu_fence *fence = to_amdgpu_fence(f); | ||
763 | kmem_cache_free(amdgpu_fence_slab, fence); | ||
764 | } | ||
765 | |||
759 | const struct fence_ops amdgpu_fence_ops = { | 766 | const struct fence_ops amdgpu_fence_ops = { |
760 | .get_driver_name = amdgpu_fence_get_driver_name, | 767 | .get_driver_name = amdgpu_fence_get_driver_name, |
761 | .get_timeline_name = amdgpu_fence_get_timeline_name, | 768 | .get_timeline_name = amdgpu_fence_get_timeline_name, |
762 | .enable_signaling = amdgpu_fence_enable_signaling, | 769 | .enable_signaling = amdgpu_fence_enable_signaling, |
763 | .signaled = amdgpu_fence_is_signaled, | 770 | .signaled = amdgpu_fence_is_signaled, |
764 | .wait = fence_default_wait, | 771 | .wait = fence_default_wait, |
765 | .release = NULL, | 772 | .release = amdgpu_fence_release, |
766 | }; | 773 | }; |
767 | 774 | ||
768 | /* | 775 | /* |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 087332858853..00c5b580f56c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | |||
@@ -483,6 +483,9 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, | |||
483 | if (domain == AMDGPU_GEM_DOMAIN_CPU) | 483 | if (domain == AMDGPU_GEM_DOMAIN_CPU) |
484 | goto error_unreserve; | 484 | goto error_unreserve; |
485 | } | 485 | } |
486 | r = amdgpu_vm_update_page_directory(adev, bo_va->vm); | ||
487 | if (r) | ||
488 | goto error_unreserve; | ||
486 | 489 | ||
487 | r = amdgpu_vm_clear_freed(adev, bo_va->vm); | 490 | r = amdgpu_vm_clear_freed(adev, bo_va->vm); |
488 | if (r) | 491 | if (r) |
@@ -512,6 +515,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
512 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | 515 | struct amdgpu_fpriv *fpriv = filp->driver_priv; |
513 | struct amdgpu_bo *rbo; | 516 | struct amdgpu_bo *rbo; |
514 | struct amdgpu_bo_va *bo_va; | 517 | struct amdgpu_bo_va *bo_va; |
518 | struct ttm_validate_buffer tv, tv_pd; | ||
519 | struct ww_acquire_ctx ticket; | ||
520 | struct list_head list, duplicates; | ||
515 | uint32_t invalid_flags, va_flags = 0; | 521 | uint32_t invalid_flags, va_flags = 0; |
516 | int r = 0; | 522 | int r = 0; |
517 | 523 | ||
@@ -549,7 +555,18 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
549 | return -ENOENT; | 555 | return -ENOENT; |
550 | mutex_lock(&fpriv->vm.mutex); | 556 | mutex_lock(&fpriv->vm.mutex); |
551 | rbo = gem_to_amdgpu_bo(gobj); | 557 | rbo = gem_to_amdgpu_bo(gobj); |
552 | r = amdgpu_bo_reserve(rbo, false); | 558 | INIT_LIST_HEAD(&list); |
559 | INIT_LIST_HEAD(&duplicates); | ||
560 | tv.bo = &rbo->tbo; | ||
561 | tv.shared = true; | ||
562 | list_add(&tv.head, &list); | ||
563 | |||
564 | if (args->operation == AMDGPU_VA_OP_MAP) { | ||
565 | tv_pd.bo = &fpriv->vm.page_directory->tbo; | ||
566 | tv_pd.shared = true; | ||
567 | list_add(&tv_pd.head, &list); | ||
568 | } | ||
569 | r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); | ||
553 | if (r) { | 570 | if (r) { |
554 | mutex_unlock(&fpriv->vm.mutex); | 571 | mutex_unlock(&fpriv->vm.mutex); |
555 | drm_gem_object_unreference_unlocked(gobj); | 572 | drm_gem_object_unreference_unlocked(gobj); |
@@ -558,7 +575,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
558 | 575 | ||
559 | bo_va = amdgpu_vm_bo_find(&fpriv->vm, rbo); | 576 | bo_va = amdgpu_vm_bo_find(&fpriv->vm, rbo); |
560 | if (!bo_va) { | 577 | if (!bo_va) { |
561 | amdgpu_bo_unreserve(rbo); | 578 | ttm_eu_backoff_reservation(&ticket, &list); |
579 | drm_gem_object_unreference_unlocked(gobj); | ||
562 | mutex_unlock(&fpriv->vm.mutex); | 580 | mutex_unlock(&fpriv->vm.mutex); |
563 | return -ENOENT; | 581 | return -ENOENT; |
564 | } | 582 | } |
@@ -581,7 +599,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
581 | default: | 599 | default: |
582 | break; | 600 | break; |
583 | } | 601 | } |
584 | 602 | ttm_eu_backoff_reservation(&ticket, &list); | |
585 | if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE)) | 603 | if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE)) |
586 | amdgpu_gem_va_update_vm(adev, bo_va, args->operation); | 604 | amdgpu_gem_va_update_vm(adev, bo_va, args->operation); |
587 | mutex_unlock(&fpriv->vm.mutex); | 605 | mutex_unlock(&fpriv->vm.mutex); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index e65987743871..9e25edafa721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | |||
@@ -62,7 +62,7 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, | |||
62 | int r; | 62 | int r; |
63 | 63 | ||
64 | if (size) { | 64 | if (size) { |
65 | r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo, | 65 | r = amdgpu_sa_bo_new(&adev->ring_tmp_bo, |
66 | &ib->sa_bo, size, 256); | 66 | &ib->sa_bo, size, 256); |
67 | if (r) { | 67 | if (r) { |
68 | dev_err(adev->dev, "failed to get a new IB (%d)\n", r); | 68 | dev_err(adev->dev, "failed to get a new IB (%d)\n", r); |
@@ -216,7 +216,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, | |||
216 | } | 216 | } |
217 | 217 | ||
218 | if (ib->vm) | 218 | if (ib->vm) |
219 | amdgpu_vm_fence(adev, ib->vm, ib->fence); | 219 | amdgpu_vm_fence(adev, ib->vm, &ib->fence->base); |
220 | 220 | ||
221 | amdgpu_ring_unlock_commit(ring); | 221 | amdgpu_ring_unlock_commit(ring); |
222 | return 0; | 222 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 3c2ff4567798..ea756e77b023 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | |||
@@ -189,10 +189,9 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, | |||
189 | struct amdgpu_sa_manager *sa_manager); | 189 | struct amdgpu_sa_manager *sa_manager); |
190 | int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, | 190 | int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, |
191 | struct amdgpu_sa_manager *sa_manager); | 191 | struct amdgpu_sa_manager *sa_manager); |
192 | int amdgpu_sa_bo_new(struct amdgpu_device *adev, | 192 | int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, |
193 | struct amdgpu_sa_manager *sa_manager, | 193 | struct amdgpu_sa_bo **sa_bo, |
194 | struct amdgpu_sa_bo **sa_bo, | 194 | unsigned size, unsigned align); |
195 | unsigned size, unsigned align); | ||
196 | void amdgpu_sa_bo_free(struct amdgpu_device *adev, | 195 | void amdgpu_sa_bo_free(struct amdgpu_device *adev, |
197 | struct amdgpu_sa_bo **sa_bo, | 196 | struct amdgpu_sa_bo **sa_bo, |
198 | struct fence *fence); | 197 | struct fence *fence); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 0212b31dc194..8b88edb0434b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | |||
@@ -311,8 +311,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, | |||
311 | return false; | 311 | return false; |
312 | } | 312 | } |
313 | 313 | ||
314 | int amdgpu_sa_bo_new(struct amdgpu_device *adev, | 314 | int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, |
315 | struct amdgpu_sa_manager *sa_manager, | ||
316 | struct amdgpu_sa_bo **sa_bo, | 315 | struct amdgpu_sa_bo **sa_bo, |
317 | unsigned size, unsigned align) | 316 | unsigned size, unsigned align) |
318 | { | 317 | { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index dcf4a8aca680..438c05254695 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/sched.h> | 26 | #include <linux/sched.h> |
27 | #include <drm/drmP.h> | 27 | #include <drm/drmP.h> |
28 | #include "amdgpu.h" | 28 | #include "amdgpu.h" |
29 | #include "amdgpu_trace.h" | ||
29 | 30 | ||
30 | static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job) | 31 | static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job) |
31 | { | 32 | { |
@@ -44,11 +45,8 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job) | |||
44 | return NULL; | 45 | return NULL; |
45 | } | 46 | } |
46 | job = to_amdgpu_job(sched_job); | 47 | job = to_amdgpu_job(sched_job); |
47 | mutex_lock(&job->job_lock); | 48 | trace_amdgpu_sched_run_job(job); |
48 | r = amdgpu_ib_schedule(job->adev, | 49 | r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner); |
49 | job->num_ibs, | ||
50 | job->ibs, | ||
51 | job->base.owner); | ||
52 | if (r) { | 50 | if (r) { |
53 | DRM_ERROR("Error scheduling IBs (%d)\n", r); | 51 | DRM_ERROR("Error scheduling IBs (%d)\n", r); |
54 | goto err; | 52 | goto err; |
@@ -61,8 +59,6 @@ err: | |||
61 | if (job->free_job) | 59 | if (job->free_job) |
62 | job->free_job(job); | 60 | job->free_job(job); |
63 | 61 | ||
64 | mutex_unlock(&job->job_lock); | ||
65 | fence_put(&job->base.s_fence->base); | ||
66 | kfree(job); | 62 | kfree(job); |
67 | return fence ? &fence->base : NULL; | 63 | return fence ? &fence->base : NULL; |
68 | } | 64 | } |
@@ -88,21 +84,19 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, | |||
88 | return -ENOMEM; | 84 | return -ENOMEM; |
89 | job->base.sched = &ring->sched; | 85 | job->base.sched = &ring->sched; |
90 | job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; | 86 | job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; |
87 | job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner); | ||
88 | if (!job->base.s_fence) { | ||
89 | kfree(job); | ||
90 | return -ENOMEM; | ||
91 | } | ||
92 | *f = fence_get(&job->base.s_fence->base); | ||
93 | |||
91 | job->adev = adev; | 94 | job->adev = adev; |
92 | job->ibs = ibs; | 95 | job->ibs = ibs; |
93 | job->num_ibs = num_ibs; | 96 | job->num_ibs = num_ibs; |
94 | job->base.owner = owner; | 97 | job->owner = owner; |
95 | mutex_init(&job->job_lock); | ||
96 | job->free_job = free_job; | 98 | job->free_job = free_job; |
97 | mutex_lock(&job->job_lock); | 99 | amd_sched_entity_push_job(&job->base); |
98 | r = amd_sched_entity_push_job(&job->base); | ||
99 | if (r) { | ||
100 | mutex_unlock(&job->job_lock); | ||
101 | kfree(job); | ||
102 | return r; | ||
103 | } | ||
104 | *f = fence_get(&job->base.s_fence->base); | ||
105 | mutex_unlock(&job->job_lock); | ||
106 | } else { | 100 | } else { |
107 | r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); | 101 | r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); |
108 | if (r) | 102 | if (r) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c index ff3ca52ec6fe..1caaf201b708 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c | |||
@@ -40,7 +40,7 @@ int amdgpu_semaphore_create(struct amdgpu_device *adev, | |||
40 | if (*semaphore == NULL) { | 40 | if (*semaphore == NULL) { |
41 | return -ENOMEM; | 41 | return -ENOMEM; |
42 | } | 42 | } |
43 | r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo, | 43 | r = amdgpu_sa_bo_new(&adev->ring_tmp_bo, |
44 | &(*semaphore)->sa_bo, 8, 8); | 44 | &(*semaphore)->sa_bo, 8, 8); |
45 | if (r) { | 45 | if (r) { |
46 | kfree(*semaphore); | 46 | kfree(*semaphore); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index a6697fd05217..dd005c336c97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | |||
@@ -302,8 +302,14 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync, | |||
302 | return -EINVAL; | 302 | return -EINVAL; |
303 | } | 303 | } |
304 | 304 | ||
305 | if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores || | 305 | if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) { |
306 | (count >= AMDGPU_NUM_SYNCS)) { | 306 | r = fence_wait(&fence->base, true); |
307 | if (r) | ||
308 | return r; | ||
309 | continue; | ||
310 | } | ||
311 | |||
312 | if (count >= AMDGPU_NUM_SYNCS) { | ||
307 | /* not enough room, wait manually */ | 313 | /* not enough room, wait manually */ |
308 | r = fence_wait(&fence->base, false); | 314 | r = fence_wait(&fence->base, false); |
309 | if (r) | 315 | if (r) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 76ecbaf72a2e..8f9834ab1bd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | |||
@@ -48,6 +48,57 @@ TRACE_EVENT(amdgpu_cs, | |||
48 | __entry->fences) | 48 | __entry->fences) |
49 | ); | 49 | ); |
50 | 50 | ||
51 | TRACE_EVENT(amdgpu_cs_ioctl, | ||
52 | TP_PROTO(struct amdgpu_job *job), | ||
53 | TP_ARGS(job), | ||
54 | TP_STRUCT__entry( | ||
55 | __field(struct amdgpu_device *, adev) | ||
56 | __field(struct amd_sched_job *, sched_job) | ||
57 | __field(struct amdgpu_ib *, ib) | ||
58 | __field(struct fence *, fence) | ||
59 | __field(char *, ring_name) | ||
60 | __field(u32, num_ibs) | ||
61 | ), | ||
62 | |||
63 | TP_fast_assign( | ||
64 | __entry->adev = job->adev; | ||
65 | __entry->sched_job = &job->base; | ||
66 | __entry->ib = job->ibs; | ||
67 | __entry->fence = &job->base.s_fence->base; | ||
68 | __entry->ring_name = job->ibs[0].ring->name; | ||
69 | __entry->num_ibs = job->num_ibs; | ||
70 | ), | ||
71 | TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", | ||
72 | __entry->adev, __entry->sched_job, __entry->ib, | ||
73 | __entry->fence, __entry->ring_name, __entry->num_ibs) | ||
74 | ); | ||
75 | |||
76 | TRACE_EVENT(amdgpu_sched_run_job, | ||
77 | TP_PROTO(struct amdgpu_job *job), | ||
78 | TP_ARGS(job), | ||
79 | TP_STRUCT__entry( | ||
80 | __field(struct amdgpu_device *, adev) | ||
81 | __field(struct amd_sched_job *, sched_job) | ||
82 | __field(struct amdgpu_ib *, ib) | ||
83 | __field(struct fence *, fence) | ||
84 | __field(char *, ring_name) | ||
85 | __field(u32, num_ibs) | ||
86 | ), | ||
87 | |||
88 | TP_fast_assign( | ||
89 | __entry->adev = job->adev; | ||
90 | __entry->sched_job = &job->base; | ||
91 | __entry->ib = job->ibs; | ||
92 | __entry->fence = &job->base.s_fence->base; | ||
93 | __entry->ring_name = job->ibs[0].ring->name; | ||
94 | __entry->num_ibs = job->num_ibs; | ||
95 | ), | ||
96 | TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", | ||
97 | __entry->adev, __entry->sched_job, __entry->ib, | ||
98 | __entry->fence, __entry->ring_name, __entry->num_ibs) | ||
99 | ); | ||
100 | |||
101 | |||
51 | TRACE_EVENT(amdgpu_vm_grab_id, | 102 | TRACE_EVENT(amdgpu_vm_grab_id, |
52 | TP_PROTO(unsigned vmid, int ring), | 103 | TP_PROTO(unsigned vmid, int ring), |
53 | TP_ARGS(vmid, ring), | 104 | TP_ARGS(vmid, ring), |
@@ -196,49 +247,6 @@ TRACE_EVENT(amdgpu_bo_list_set, | |||
196 | TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) | 247 | TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) |
197 | ); | 248 | ); |
198 | 249 | ||
199 | DECLARE_EVENT_CLASS(amdgpu_fence_request, | ||
200 | |||
201 | TP_PROTO(struct drm_device *dev, int ring, u32 seqno), | ||
202 | |||
203 | TP_ARGS(dev, ring, seqno), | ||
204 | |||
205 | TP_STRUCT__entry( | ||
206 | __field(u32, dev) | ||
207 | __field(int, ring) | ||
208 | __field(u32, seqno) | ||
209 | ), | ||
210 | |||
211 | TP_fast_assign( | ||
212 | __entry->dev = dev->primary->index; | ||
213 | __entry->ring = ring; | ||
214 | __entry->seqno = seqno; | ||
215 | ), | ||
216 | |||
217 | TP_printk("dev=%u, ring=%d, seqno=%u", | ||
218 | __entry->dev, __entry->ring, __entry->seqno) | ||
219 | ); | ||
220 | |||
221 | DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_emit, | ||
222 | |||
223 | TP_PROTO(struct drm_device *dev, int ring, u32 seqno), | ||
224 | |||
225 | TP_ARGS(dev, ring, seqno) | ||
226 | ); | ||
227 | |||
228 | DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_begin, | ||
229 | |||
230 | TP_PROTO(struct drm_device *dev, int ring, u32 seqno), | ||
231 | |||
232 | TP_ARGS(dev, ring, seqno) | ||
233 | ); | ||
234 | |||
235 | DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_end, | ||
236 | |||
237 | TP_PROTO(struct drm_device *dev, int ring, u32 seqno), | ||
238 | |||
239 | TP_ARGS(dev, ring, seqno) | ||
240 | ); | ||
241 | |||
242 | DECLARE_EVENT_CLASS(amdgpu_semaphore_request, | 250 | DECLARE_EVENT_CLASS(amdgpu_semaphore_request, |
243 | 251 | ||
244 | TP_PROTO(int ring, struct amdgpu_semaphore *sem), | 252 | TP_PROTO(int ring, struct amdgpu_semaphore *sem), |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 81bb8e9fc26d..d4bac5f49939 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |||
@@ -1073,10 +1073,10 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data) | |||
1073 | ret = drm_mm_dump_table(m, mm); | 1073 | ret = drm_mm_dump_table(m, mm); |
1074 | spin_unlock(&glob->lru_lock); | 1074 | spin_unlock(&glob->lru_lock); |
1075 | if (ttm_pl == TTM_PL_VRAM) | 1075 | if (ttm_pl == TTM_PL_VRAM) |
1076 | seq_printf(m, "man size:%llu pages, ram usage:%luMB, vis usage:%luMB\n", | 1076 | seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", |
1077 | adev->mman.bdev.man[ttm_pl].size, | 1077 | adev->mman.bdev.man[ttm_pl].size, |
1078 | atomic64_read(&adev->vram_usage) >> 20, | 1078 | (u64)atomic64_read(&adev->vram_usage) >> 20, |
1079 | atomic64_read(&adev->vram_vis_usage) >> 20); | 1079 | (u64)atomic64_read(&adev->vram_vis_usage) >> 20); |
1080 | return ret; | 1080 | return ret; |
1081 | } | 1081 | } |
1082 | 1082 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 633a32a48560..159ce54bbd8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -143,10 +143,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | |||
143 | unsigned i; | 143 | unsigned i; |
144 | 144 | ||
145 | /* check if the id is still valid */ | 145 | /* check if the id is still valid */ |
146 | if (vm_id->id && vm_id->last_id_use && | 146 | if (vm_id->id) { |
147 | vm_id->last_id_use == adev->vm_manager.active[vm_id->id]) { | 147 | unsigned id = vm_id->id; |
148 | trace_amdgpu_vm_grab_id(vm_id->id, ring->idx); | 148 | long owner; |
149 | return 0; | 149 | |
150 | owner = atomic_long_read(&adev->vm_manager.ids[id].owner); | ||
151 | if (owner == (long)vm) { | ||
152 | trace_amdgpu_vm_grab_id(vm_id->id, ring->idx); | ||
153 | return 0; | ||
154 | } | ||
150 | } | 155 | } |
151 | 156 | ||
152 | /* we definately need to flush */ | 157 | /* we definately need to flush */ |
@@ -154,7 +159,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | |||
154 | 159 | ||
155 | /* skip over VMID 0, since it is the system VM */ | 160 | /* skip over VMID 0, since it is the system VM */ |
156 | for (i = 1; i < adev->vm_manager.nvm; ++i) { | 161 | for (i = 1; i < adev->vm_manager.nvm; ++i) { |
157 | struct fence *fence = adev->vm_manager.active[i]; | 162 | struct fence *fence = adev->vm_manager.ids[i].active; |
158 | struct amdgpu_ring *fring; | 163 | struct amdgpu_ring *fring; |
159 | 164 | ||
160 | if (fence == NULL) { | 165 | if (fence == NULL) { |
@@ -176,7 +181,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | |||
176 | if (choices[i]) { | 181 | if (choices[i]) { |
177 | struct fence *fence; | 182 | struct fence *fence; |
178 | 183 | ||
179 | fence = adev->vm_manager.active[choices[i]]; | 184 | fence = adev->vm_manager.ids[choices[i]].active; |
180 | vm_id->id = choices[i]; | 185 | vm_id->id = choices[i]; |
181 | 186 | ||
182 | trace_amdgpu_vm_grab_id(choices[i], ring->idx); | 187 | trace_amdgpu_vm_grab_id(choices[i], ring->idx); |
@@ -207,24 +212,21 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, | |||
207 | uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); | 212 | uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); |
208 | struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; | 213 | struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; |
209 | struct fence *flushed_updates = vm_id->flushed_updates; | 214 | struct fence *flushed_updates = vm_id->flushed_updates; |
210 | bool is_earlier = false; | 215 | bool is_later; |
211 | |||
212 | if (flushed_updates && updates) { | ||
213 | BUG_ON(flushed_updates->context != updates->context); | ||
214 | is_earlier = (updates->seqno - flushed_updates->seqno <= | ||
215 | INT_MAX) ? true : false; | ||
216 | } | ||
217 | 216 | ||
218 | if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates || | 217 | if (!flushed_updates) |
219 | is_earlier) { | 218 | is_later = true; |
219 | else if (!updates) | ||
220 | is_later = false; | ||
221 | else | ||
222 | is_later = fence_is_later(updates, flushed_updates); | ||
220 | 223 | ||
224 | if (pd_addr != vm_id->pd_gpu_addr || is_later) { | ||
221 | trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); | 225 | trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); |
222 | if (is_earlier) { | 226 | if (is_later) { |
223 | vm_id->flushed_updates = fence_get(updates); | 227 | vm_id->flushed_updates = fence_get(updates); |
224 | fence_put(flushed_updates); | 228 | fence_put(flushed_updates); |
225 | } | 229 | } |
226 | if (!flushed_updates) | ||
227 | vm_id->flushed_updates = fence_get(updates); | ||
228 | vm_id->pd_gpu_addr = pd_addr; | 230 | vm_id->pd_gpu_addr = pd_addr; |
229 | amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); | 231 | amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); |
230 | } | 232 | } |
@@ -244,16 +246,14 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, | |||
244 | */ | 246 | */ |
245 | void amdgpu_vm_fence(struct amdgpu_device *adev, | 247 | void amdgpu_vm_fence(struct amdgpu_device *adev, |
246 | struct amdgpu_vm *vm, | 248 | struct amdgpu_vm *vm, |
247 | struct amdgpu_fence *fence) | 249 | struct fence *fence) |
248 | { | 250 | { |
249 | unsigned ridx = fence->ring->idx; | 251 | struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence); |
250 | unsigned vm_id = vm->ids[ridx].id; | 252 | unsigned vm_id = vm->ids[ring->idx].id; |
251 | |||
252 | fence_put(adev->vm_manager.active[vm_id]); | ||
253 | adev->vm_manager.active[vm_id] = fence_get(&fence->base); | ||
254 | 253 | ||
255 | fence_put(vm->ids[ridx].last_id_use); | 254 | fence_put(adev->vm_manager.ids[vm_id].active); |
256 | vm->ids[ridx].last_id_use = fence_get(&fence->base); | 255 | adev->vm_manager.ids[vm_id].active = fence_get(fence); |
256 | atomic_long_set(&adev->vm_manager.ids[vm_id].owner, (long)vm); | ||
257 | } | 257 | } |
258 | 258 | ||
259 | /** | 259 | /** |
@@ -332,6 +332,8 @@ int amdgpu_vm_free_job(struct amdgpu_job *job) | |||
332 | * | 332 | * |
333 | * @adev: amdgpu_device pointer | 333 | * @adev: amdgpu_device pointer |
334 | * @bo: bo to clear | 334 | * @bo: bo to clear |
335 | * | ||
336 | * need to reserve bo first before calling it. | ||
335 | */ | 337 | */ |
336 | static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | 338 | static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, |
337 | struct amdgpu_bo *bo) | 339 | struct amdgpu_bo *bo) |
@@ -343,24 +345,20 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | |||
343 | uint64_t addr; | 345 | uint64_t addr; |
344 | int r; | 346 | int r; |
345 | 347 | ||
346 | r = amdgpu_bo_reserve(bo, false); | ||
347 | if (r) | ||
348 | return r; | ||
349 | |||
350 | r = reservation_object_reserve_shared(bo->tbo.resv); | 348 | r = reservation_object_reserve_shared(bo->tbo.resv); |
351 | if (r) | 349 | if (r) |
352 | return r; | 350 | return r; |
353 | 351 | ||
354 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); | 352 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); |
355 | if (r) | 353 | if (r) |
356 | goto error_unreserve; | 354 | goto error; |
357 | 355 | ||
358 | addr = amdgpu_bo_gpu_offset(bo); | 356 | addr = amdgpu_bo_gpu_offset(bo); |
359 | entries = amdgpu_bo_size(bo) / 8; | 357 | entries = amdgpu_bo_size(bo) / 8; |
360 | 358 | ||
361 | ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); | 359 | ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); |
362 | if (!ib) | 360 | if (!ib) |
363 | goto error_unreserve; | 361 | goto error; |
364 | 362 | ||
365 | r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib); | 363 | r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib); |
366 | if (r) | 364 | if (r) |
@@ -378,16 +376,14 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | |||
378 | if (!r) | 376 | if (!r) |
379 | amdgpu_bo_fence(bo, fence, true); | 377 | amdgpu_bo_fence(bo, fence, true); |
380 | fence_put(fence); | 378 | fence_put(fence); |
381 | if (amdgpu_enable_scheduler) { | 379 | if (amdgpu_enable_scheduler) |
382 | amdgpu_bo_unreserve(bo); | ||
383 | return 0; | 380 | return 0; |
384 | } | 381 | |
385 | error_free: | 382 | error_free: |
386 | amdgpu_ib_free(adev, ib); | 383 | amdgpu_ib_free(adev, ib); |
387 | kfree(ib); | 384 | kfree(ib); |
388 | 385 | ||
389 | error_unreserve: | 386 | error: |
390 | amdgpu_bo_unreserve(bo); | ||
391 | return r; | 387 | return r; |
392 | } | 388 | } |
393 | 389 | ||
@@ -989,7 +985,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, | |||
989 | * Add a mapping of the BO at the specefied addr into the VM. | 985 | * Add a mapping of the BO at the specefied addr into the VM. |
990 | * Returns 0 for success, error for failure. | 986 | * Returns 0 for success, error for failure. |
991 | * | 987 | * |
992 | * Object has to be reserved and gets unreserved by this function! | 988 | * Object has to be reserved and unreserved outside! |
993 | */ | 989 | */ |
994 | int amdgpu_vm_bo_map(struct amdgpu_device *adev, | 990 | int amdgpu_vm_bo_map(struct amdgpu_device *adev, |
995 | struct amdgpu_bo_va *bo_va, | 991 | struct amdgpu_bo_va *bo_va, |
@@ -1005,30 +1001,27 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
1005 | 1001 | ||
1006 | /* validate the parameters */ | 1002 | /* validate the parameters */ |
1007 | if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || | 1003 | if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || |
1008 | size == 0 || size & AMDGPU_GPU_PAGE_MASK) { | 1004 | size == 0 || size & AMDGPU_GPU_PAGE_MASK) |
1009 | amdgpu_bo_unreserve(bo_va->bo); | ||
1010 | return -EINVAL; | 1005 | return -EINVAL; |
1011 | } | ||
1012 | 1006 | ||
1013 | /* make sure object fit at this offset */ | 1007 | /* make sure object fit at this offset */ |
1014 | eaddr = saddr + size; | 1008 | eaddr = saddr + size; |
1015 | if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) { | 1009 | if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) |
1016 | amdgpu_bo_unreserve(bo_va->bo); | ||
1017 | return -EINVAL; | 1010 | return -EINVAL; |
1018 | } | ||
1019 | 1011 | ||
1020 | last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; | 1012 | last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; |
1021 | if (last_pfn > adev->vm_manager.max_pfn) { | 1013 | if (last_pfn > adev->vm_manager.max_pfn) { |
1022 | dev_err(adev->dev, "va above limit (0x%08X > 0x%08X)\n", | 1014 | dev_err(adev->dev, "va above limit (0x%08X > 0x%08X)\n", |
1023 | last_pfn, adev->vm_manager.max_pfn); | 1015 | last_pfn, adev->vm_manager.max_pfn); |
1024 | amdgpu_bo_unreserve(bo_va->bo); | ||
1025 | return -EINVAL; | 1016 | return -EINVAL; |
1026 | } | 1017 | } |
1027 | 1018 | ||
1028 | saddr /= AMDGPU_GPU_PAGE_SIZE; | 1019 | saddr /= AMDGPU_GPU_PAGE_SIZE; |
1029 | eaddr /= AMDGPU_GPU_PAGE_SIZE; | 1020 | eaddr /= AMDGPU_GPU_PAGE_SIZE; |
1030 | 1021 | ||
1022 | spin_lock(&vm->it_lock); | ||
1031 | it = interval_tree_iter_first(&vm->va, saddr, eaddr - 1); | 1023 | it = interval_tree_iter_first(&vm->va, saddr, eaddr - 1); |
1024 | spin_unlock(&vm->it_lock); | ||
1032 | if (it) { | 1025 | if (it) { |
1033 | struct amdgpu_bo_va_mapping *tmp; | 1026 | struct amdgpu_bo_va_mapping *tmp; |
1034 | tmp = container_of(it, struct amdgpu_bo_va_mapping, it); | 1027 | tmp = container_of(it, struct amdgpu_bo_va_mapping, it); |
@@ -1036,14 +1029,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
1036 | dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " | 1029 | dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " |
1037 | "0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr, | 1030 | "0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr, |
1038 | tmp->it.start, tmp->it.last + 1); | 1031 | tmp->it.start, tmp->it.last + 1); |
1039 | amdgpu_bo_unreserve(bo_va->bo); | ||
1040 | r = -EINVAL; | 1032 | r = -EINVAL; |
1041 | goto error; | 1033 | goto error; |
1042 | } | 1034 | } |
1043 | 1035 | ||
1044 | mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); | 1036 | mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); |
1045 | if (!mapping) { | 1037 | if (!mapping) { |
1046 | amdgpu_bo_unreserve(bo_va->bo); | ||
1047 | r = -ENOMEM; | 1038 | r = -ENOMEM; |
1048 | goto error; | 1039 | goto error; |
1049 | } | 1040 | } |
@@ -1055,7 +1046,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
1055 | mapping->flags = flags; | 1046 | mapping->flags = flags; |
1056 | 1047 | ||
1057 | list_add(&mapping->list, &bo_va->invalids); | 1048 | list_add(&mapping->list, &bo_va->invalids); |
1049 | spin_lock(&vm->it_lock); | ||
1058 | interval_tree_insert(&mapping->it, &vm->va); | 1050 | interval_tree_insert(&mapping->it, &vm->va); |
1051 | spin_unlock(&vm->it_lock); | ||
1059 | trace_amdgpu_vm_bo_map(bo_va, mapping); | 1052 | trace_amdgpu_vm_bo_map(bo_va, mapping); |
1060 | 1053 | ||
1061 | /* Make sure the page tables are allocated */ | 1054 | /* Make sure the page tables are allocated */ |
@@ -1067,8 +1060,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
1067 | if (eaddr > vm->max_pde_used) | 1060 | if (eaddr > vm->max_pde_used) |
1068 | vm->max_pde_used = eaddr; | 1061 | vm->max_pde_used = eaddr; |
1069 | 1062 | ||
1070 | amdgpu_bo_unreserve(bo_va->bo); | ||
1071 | |||
1072 | /* walk over the address space and allocate the page tables */ | 1063 | /* walk over the address space and allocate the page tables */ |
1073 | for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { | 1064 | for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { |
1074 | struct reservation_object *resv = vm->page_directory->tbo.resv; | 1065 | struct reservation_object *resv = vm->page_directory->tbo.resv; |
@@ -1077,13 +1068,11 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
1077 | if (vm->page_tables[pt_idx].bo) | 1068 | if (vm->page_tables[pt_idx].bo) |
1078 | continue; | 1069 | continue; |
1079 | 1070 | ||
1080 | ww_mutex_lock(&resv->lock, NULL); | ||
1081 | r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, | 1071 | r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, |
1082 | AMDGPU_GPU_PAGE_SIZE, true, | 1072 | AMDGPU_GPU_PAGE_SIZE, true, |
1083 | AMDGPU_GEM_DOMAIN_VRAM, | 1073 | AMDGPU_GEM_DOMAIN_VRAM, |
1084 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS, | 1074 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS, |
1085 | NULL, resv, &pt); | 1075 | NULL, resv, &pt); |
1086 | ww_mutex_unlock(&resv->lock); | ||
1087 | if (r) | 1076 | if (r) |
1088 | goto error_free; | 1077 | goto error_free; |
1089 | 1078 | ||
@@ -1101,7 +1090,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
1101 | 1090 | ||
1102 | error_free: | 1091 | error_free: |
1103 | list_del(&mapping->list); | 1092 | list_del(&mapping->list); |
1093 | spin_lock(&vm->it_lock); | ||
1104 | interval_tree_remove(&mapping->it, &vm->va); | 1094 | interval_tree_remove(&mapping->it, &vm->va); |
1095 | spin_unlock(&vm->it_lock); | ||
1105 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); | 1096 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); |
1106 | kfree(mapping); | 1097 | kfree(mapping); |
1107 | 1098 | ||
@@ -1119,7 +1110,7 @@ error: | |||
1119 | * Remove a mapping of the BO at the specefied addr from the VM. | 1110 | * Remove a mapping of the BO at the specefied addr from the VM. |
1120 | * Returns 0 for success, error for failure. | 1111 | * Returns 0 for success, error for failure. |
1121 | * | 1112 | * |
1122 | * Object has to be reserved and gets unreserved by this function! | 1113 | * Object has to be reserved and unreserved outside! |
1123 | */ | 1114 | */ |
1124 | int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | 1115 | int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, |
1125 | struct amdgpu_bo_va *bo_va, | 1116 | struct amdgpu_bo_va *bo_va, |
@@ -1144,21 +1135,20 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | |||
1144 | break; | 1135 | break; |
1145 | } | 1136 | } |
1146 | 1137 | ||
1147 | if (&mapping->list == &bo_va->invalids) { | 1138 | if (&mapping->list == &bo_va->invalids) |
1148 | amdgpu_bo_unreserve(bo_va->bo); | ||
1149 | return -ENOENT; | 1139 | return -ENOENT; |
1150 | } | ||
1151 | } | 1140 | } |
1152 | 1141 | ||
1153 | list_del(&mapping->list); | 1142 | list_del(&mapping->list); |
1143 | spin_lock(&vm->it_lock); | ||
1154 | interval_tree_remove(&mapping->it, &vm->va); | 1144 | interval_tree_remove(&mapping->it, &vm->va); |
1145 | spin_unlock(&vm->it_lock); | ||
1155 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); | 1146 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); |
1156 | 1147 | ||
1157 | if (valid) | 1148 | if (valid) |
1158 | list_add(&mapping->list, &vm->freed); | 1149 | list_add(&mapping->list, &vm->freed); |
1159 | else | 1150 | else |
1160 | kfree(mapping); | 1151 | kfree(mapping); |
1161 | amdgpu_bo_unreserve(bo_va->bo); | ||
1162 | 1152 | ||
1163 | return 0; | 1153 | return 0; |
1164 | } | 1154 | } |
@@ -1187,13 +1177,17 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | |||
1187 | 1177 | ||
1188 | list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { | 1178 | list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { |
1189 | list_del(&mapping->list); | 1179 | list_del(&mapping->list); |
1180 | spin_lock(&vm->it_lock); | ||
1190 | interval_tree_remove(&mapping->it, &vm->va); | 1181 | interval_tree_remove(&mapping->it, &vm->va); |
1182 | spin_unlock(&vm->it_lock); | ||
1191 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); | 1183 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); |
1192 | list_add(&mapping->list, &vm->freed); | 1184 | list_add(&mapping->list, &vm->freed); |
1193 | } | 1185 | } |
1194 | list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { | 1186 | list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { |
1195 | list_del(&mapping->list); | 1187 | list_del(&mapping->list); |
1188 | spin_lock(&vm->it_lock); | ||
1196 | interval_tree_remove(&mapping->it, &vm->va); | 1189 | interval_tree_remove(&mapping->it, &vm->va); |
1190 | spin_unlock(&vm->it_lock); | ||
1197 | kfree(mapping); | 1191 | kfree(mapping); |
1198 | } | 1192 | } |
1199 | 1193 | ||
@@ -1241,7 +1235,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1241 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 1235 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
1242 | vm->ids[i].id = 0; | 1236 | vm->ids[i].id = 0; |
1243 | vm->ids[i].flushed_updates = NULL; | 1237 | vm->ids[i].flushed_updates = NULL; |
1244 | vm->ids[i].last_id_use = NULL; | ||
1245 | } | 1238 | } |
1246 | mutex_init(&vm->mutex); | 1239 | mutex_init(&vm->mutex); |
1247 | vm->va = RB_ROOT; | 1240 | vm->va = RB_ROOT; |
@@ -1249,7 +1242,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1249 | INIT_LIST_HEAD(&vm->invalidated); | 1242 | INIT_LIST_HEAD(&vm->invalidated); |
1250 | INIT_LIST_HEAD(&vm->cleared); | 1243 | INIT_LIST_HEAD(&vm->cleared); |
1251 | INIT_LIST_HEAD(&vm->freed); | 1244 | INIT_LIST_HEAD(&vm->freed); |
1252 | 1245 | spin_lock_init(&vm->it_lock); | |
1253 | pd_size = amdgpu_vm_directory_size(adev); | 1246 | pd_size = amdgpu_vm_directory_size(adev); |
1254 | pd_entries = amdgpu_vm_num_pdes(adev); | 1247 | pd_entries = amdgpu_vm_num_pdes(adev); |
1255 | 1248 | ||
@@ -1269,8 +1262,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1269 | NULL, NULL, &vm->page_directory); | 1262 | NULL, NULL, &vm->page_directory); |
1270 | if (r) | 1263 | if (r) |
1271 | return r; | 1264 | return r; |
1272 | 1265 | r = amdgpu_bo_reserve(vm->page_directory, false); | |
1266 | if (r) { | ||
1267 | amdgpu_bo_unref(&vm->page_directory); | ||
1268 | vm->page_directory = NULL; | ||
1269 | return r; | ||
1270 | } | ||
1273 | r = amdgpu_vm_clear_bo(adev, vm->page_directory); | 1271 | r = amdgpu_vm_clear_bo(adev, vm->page_directory); |
1272 | amdgpu_bo_unreserve(vm->page_directory); | ||
1274 | if (r) { | 1273 | if (r) { |
1275 | amdgpu_bo_unref(&vm->page_directory); | 1274 | amdgpu_bo_unref(&vm->page_directory); |
1276 | vm->page_directory = NULL; | 1275 | vm->page_directory = NULL; |
@@ -1313,11 +1312,28 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1313 | 1312 | ||
1314 | amdgpu_bo_unref(&vm->page_directory); | 1313 | amdgpu_bo_unref(&vm->page_directory); |
1315 | fence_put(vm->page_directory_fence); | 1314 | fence_put(vm->page_directory_fence); |
1316 | |||
1317 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 1315 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
1316 | unsigned id = vm->ids[i].id; | ||
1317 | |||
1318 | atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner, | ||
1319 | (long)vm, 0); | ||
1318 | fence_put(vm->ids[i].flushed_updates); | 1320 | fence_put(vm->ids[i].flushed_updates); |
1319 | fence_put(vm->ids[i].last_id_use); | ||
1320 | } | 1321 | } |
1321 | 1322 | ||
1322 | mutex_destroy(&vm->mutex); | 1323 | mutex_destroy(&vm->mutex); |
1323 | } | 1324 | } |
1325 | |||
1326 | /** | ||
1327 | * amdgpu_vm_manager_fini - cleanup VM manager | ||
1328 | * | ||
1329 | * @adev: amdgpu_device pointer | ||
1330 | * | ||
1331 | * Cleanup the VM manager and free resources. | ||
1332 | */ | ||
1333 | void amdgpu_vm_manager_fini(struct amdgpu_device *adev) | ||
1334 | { | ||
1335 | unsigned i; | ||
1336 | |||
1337 | for (i = 0; i < AMDGPU_NUM_VM; ++i) | ||
1338 | fence_put(adev->vm_manager.ids[i].active); | ||
1339 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index a1a35a5df8e7..57a2e347f04d 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c | |||
@@ -6569,12 +6569,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev, | |||
6569 | switch (state) { | 6569 | switch (state) { |
6570 | case AMDGPU_IRQ_STATE_DISABLE: | 6570 | case AMDGPU_IRQ_STATE_DISABLE: |
6571 | cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); | 6571 | cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); |
6572 | cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; | 6572 | cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; |
6573 | WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); | 6573 | WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); |
6574 | break; | 6574 | break; |
6575 | case AMDGPU_IRQ_STATE_ENABLE: | 6575 | case AMDGPU_IRQ_STATE_ENABLE: |
6576 | cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); | 6576 | cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); |
6577 | cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; | 6577 | cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; |
6578 | WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); | 6578 | WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); |
6579 | break; | 6579 | break; |
6580 | default: | 6580 | default: |
@@ -6586,12 +6586,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev, | |||
6586 | switch (state) { | 6586 | switch (state) { |
6587 | case AMDGPU_IRQ_STATE_DISABLE: | 6587 | case AMDGPU_IRQ_STATE_DISABLE: |
6588 | cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); | 6588 | cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); |
6589 | cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; | 6589 | cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; |
6590 | WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); | 6590 | WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); |
6591 | break; | 6591 | break; |
6592 | case AMDGPU_IRQ_STATE_ENABLE: | 6592 | case AMDGPU_IRQ_STATE_ENABLE: |
6593 | cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); | 6593 | cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); |
6594 | cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; | 6594 | cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; |
6595 | WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); | 6595 | WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); |
6596 | break; | 6596 | break; |
6597 | default: | 6597 | default: |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 6776cf756d40..e1dcab98e249 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | |||
@@ -268,7 +268,6 @@ static const u32 fiji_mgcg_cgcg_init[] = | |||
268 | mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, | 268 | mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, |
269 | mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, | 269 | mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, |
270 | mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, | 270 | mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, |
271 | mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100, | ||
272 | mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, | 271 | mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, |
273 | mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, | 272 | mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, |
274 | mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, | 273 | mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, |
@@ -296,10 +295,6 @@ static const u32 fiji_mgcg_cgcg_init[] = | |||
296 | mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, | 295 | mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, |
297 | mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, | 296 | mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, |
298 | mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, | 297 | mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, |
299 | mmPCIE_INDEX, 0xffffffff, 0x0140001c, | ||
300 | mmPCIE_DATA, 0x000f0000, 0x00000000, | ||
301 | mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100, | ||
302 | mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104, | ||
303 | mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, | 298 | mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, |
304 | }; | 299 | }; |
305 | 300 | ||
@@ -1000,7 +995,7 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) | |||
1000 | adev->gfx.config.max_cu_per_sh = 16; | 995 | adev->gfx.config.max_cu_per_sh = 16; |
1001 | adev->gfx.config.max_sh_per_se = 1; | 996 | adev->gfx.config.max_sh_per_se = 1; |
1002 | adev->gfx.config.max_backends_per_se = 4; | 997 | adev->gfx.config.max_backends_per_se = 4; |
1003 | adev->gfx.config.max_texture_channel_caches = 8; | 998 | adev->gfx.config.max_texture_channel_caches = 16; |
1004 | adev->gfx.config.max_gprs = 256; | 999 | adev->gfx.config.max_gprs = 256; |
1005 | adev->gfx.config.max_gs_threads = 32; | 1000 | adev->gfx.config.max_gs_threads = 32; |
1006 | adev->gfx.config.max_hw_contexts = 8; | 1001 | adev->gfx.config.max_hw_contexts = 8; |
@@ -1613,6 +1608,296 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) | |||
1613 | WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); | 1608 | WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); |
1614 | } | 1609 | } |
1615 | case CHIP_FIJI: | 1610 | case CHIP_FIJI: |
1611 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { | ||
1612 | switch (reg_offset) { | ||
1613 | case 0: | ||
1614 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
1615 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1616 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | | ||
1617 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | ||
1618 | break; | ||
1619 | case 1: | ||
1620 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
1621 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1622 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | | ||
1623 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | ||
1624 | break; | ||
1625 | case 2: | ||
1626 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
1627 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1628 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | | ||
1629 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | ||
1630 | break; | ||
1631 | case 3: | ||
1632 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
1633 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1634 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | | ||
1635 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | ||
1636 | break; | ||
1637 | case 4: | ||
1638 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
1639 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1640 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | | ||
1641 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | ||
1642 | break; | ||
1643 | case 5: | ||
1644 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | ||
1645 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1646 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | | ||
1647 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | ||
1648 | break; | ||
1649 | case 6: | ||
1650 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | ||
1651 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1652 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | | ||
1653 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | ||
1654 | break; | ||
1655 | case 7: | ||
1656 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | ||
1657 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | ||
1658 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | | ||
1659 | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); | ||
1660 | break; | ||
1661 | case 8: | ||
1662 | gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | | ||
1663 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); | ||
1664 | break; | ||
1665 | case 9: | ||
1666 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | ||
1667 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1668 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | | ||
1669 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | ||
1670 | break; | ||
1671 | case 10: | ||
1672 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
1673 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1674 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | | ||
1675 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | ||
1676 | break; | ||
1677 | case 11: | ||
1678 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | ||
1679 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1680 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | | ||
1681 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | ||
1682 | break; | ||
1683 | case 12: | ||
1684 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | ||
1685 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | ||
1686 | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | | ||
1687 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | ||
1688 | break; | ||
1689 | case 13: | ||
1690 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | ||
1691 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1692 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | ||
1693 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | ||
1694 | break; | ||
1695 | case 14: | ||
1696 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
1697 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1698 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | ||
1699 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | ||
1700 | break; | ||
1701 | case 15: | ||
1702 | gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | | ||
1703 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1704 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | ||
1705 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | ||
1706 | break; | ||
1707 | case 16: | ||
1708 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | ||
1709 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1710 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | ||
1711 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | ||
1712 | break; | ||
1713 | case 17: | ||
1714 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | ||
1715 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | ||
1716 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | ||
1717 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | ||
1718 | break; | ||
1719 | case 18: | ||
1720 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | | ||
1721 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1722 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | ||
1723 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | ||
1724 | break; | ||
1725 | case 19: | ||
1726 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | | ||
1727 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1728 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | ||
1729 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | ||
1730 | break; | ||
1731 | case 20: | ||
1732 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | | ||
1733 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1734 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | ||
1735 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | ||
1736 | break; | ||
1737 | case 21: | ||
1738 | gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | | ||
1739 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1740 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | ||
1741 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | ||
1742 | break; | ||
1743 | case 22: | ||
1744 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | | ||
1745 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1746 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | ||
1747 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | ||
1748 | break; | ||
1749 | case 23: | ||
1750 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | | ||
1751 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | ||
1752 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | ||
1753 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | ||
1754 | break; | ||
1755 | case 24: | ||
1756 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | | ||
1757 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1758 | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | | ||
1759 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | ||
1760 | break; | ||
1761 | case 25: | ||
1762 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | | ||
1763 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1764 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | ||
1765 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | ||
1766 | break; | ||
1767 | case 26: | ||
1768 | gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | | ||
1769 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1770 | MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | | ||
1771 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); | ||
1772 | break; | ||
1773 | case 27: | ||
1774 | gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | | ||
1775 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1776 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | | ||
1777 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | ||
1778 | break; | ||
1779 | case 28: | ||
1780 | gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | ||
1781 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1782 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | | ||
1783 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); | ||
1784 | break; | ||
1785 | case 29: | ||
1786 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | ||
1787 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | ||
1788 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | | ||
1789 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | ||
1790 | break; | ||
1791 | case 30: | ||
1792 | gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | | ||
1793 | PIPE_CONFIG(ADDR_SURF_P4_16x16) | | ||
1794 | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | | ||
1795 | SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); | ||
1796 | break; | ||
1797 | default: | ||
1798 | gb_tile_moden = 0; | ||
1799 | break; | ||
1800 | } | ||
1801 | adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; | ||
1802 | WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); | ||
1803 | } | ||
1804 | for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { | ||
1805 | switch (reg_offset) { | ||
1806 | case 0: | ||
1807 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1808 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
1809 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1810 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1811 | break; | ||
1812 | case 1: | ||
1813 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1814 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
1815 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1816 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1817 | break; | ||
1818 | case 2: | ||
1819 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1820 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
1821 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1822 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1823 | break; | ||
1824 | case 3: | ||
1825 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1826 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
1827 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1828 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1829 | break; | ||
1830 | case 4: | ||
1831 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1832 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
1833 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1834 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1835 | break; | ||
1836 | case 5: | ||
1837 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1838 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1839 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1840 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1841 | break; | ||
1842 | case 6: | ||
1843 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1844 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1845 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1846 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1847 | break; | ||
1848 | case 8: | ||
1849 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1850 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | | ||
1851 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1852 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1853 | break; | ||
1854 | case 9: | ||
1855 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1856 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | | ||
1857 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1858 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1859 | break; | ||
1860 | case 10: | ||
1861 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1862 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
1863 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1864 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1865 | break; | ||
1866 | case 11: | ||
1867 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1868 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1869 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1870 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1871 | break; | ||
1872 | case 12: | ||
1873 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1874 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | | ||
1875 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1876 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1877 | break; | ||
1878 | case 13: | ||
1879 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1880 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1881 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | | ||
1882 | NUM_BANKS(ADDR_SURF_8_BANK)); | ||
1883 | break; | ||
1884 | case 14: | ||
1885 | gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | | ||
1886 | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | | ||
1887 | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | | ||
1888 | NUM_BANKS(ADDR_SURF_4_BANK)); | ||
1889 | break; | ||
1890 | case 7: | ||
1891 | /* unused idx */ | ||
1892 | continue; | ||
1893 | default: | ||
1894 | gb_tile_moden = 0; | ||
1895 | break; | ||
1896 | } | ||
1897 | adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden; | ||
1898 | WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); | ||
1899 | } | ||
1900 | break; | ||
1616 | case CHIP_TONGA: | 1901 | case CHIP_TONGA: |
1617 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { | 1902 | for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { |
1618 | switch (reg_offset) { | 1903 | switch (reg_offset) { |
@@ -2971,10 +3256,13 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) | |||
2971 | amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); | 3256 | amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); |
2972 | switch (adev->asic_type) { | 3257 | switch (adev->asic_type) { |
2973 | case CHIP_TONGA: | 3258 | case CHIP_TONGA: |
2974 | case CHIP_FIJI: | ||
2975 | amdgpu_ring_write(ring, 0x16000012); | 3259 | amdgpu_ring_write(ring, 0x16000012); |
2976 | amdgpu_ring_write(ring, 0x0000002A); | 3260 | amdgpu_ring_write(ring, 0x0000002A); |
2977 | break; | 3261 | break; |
3262 | case CHIP_FIJI: | ||
3263 | amdgpu_ring_write(ring, 0x3a00161a); | ||
3264 | amdgpu_ring_write(ring, 0x0000002e); | ||
3265 | break; | ||
2978 | case CHIP_TOPAZ: | 3266 | case CHIP_TOPAZ: |
2979 | case CHIP_CARRIZO: | 3267 | case CHIP_CARRIZO: |
2980 | amdgpu_ring_write(ring, 0x00000002); | 3268 | amdgpu_ring_write(ring, 0x00000002); |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 85bbcdc73fff..7427d8cd4c43 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | |||
@@ -40,7 +40,7 @@ | |||
40 | static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev); | 40 | static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev); |
41 | static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev); | 41 | static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev); |
42 | 42 | ||
43 | MODULE_FIRMWARE("radeon/boniare_mc.bin"); | 43 | MODULE_FIRMWARE("radeon/bonaire_mc.bin"); |
44 | MODULE_FIRMWARE("radeon/hawaii_mc.bin"); | 44 | MODULE_FIRMWARE("radeon/hawaii_mc.bin"); |
45 | 45 | ||
46 | /** | 46 | /** |
@@ -501,6 +501,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) | |||
501 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1); | 501 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1); |
502 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7); | 502 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7); |
503 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); | 503 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); |
504 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); | ||
504 | WREG32(mmVM_L2_CNTL, tmp); | 505 | WREG32(mmVM_L2_CNTL, tmp); |
505 | tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); | 506 | tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); |
506 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); | 507 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); |
@@ -960,12 +961,10 @@ static int gmc_v7_0_sw_init(void *handle) | |||
960 | 961 | ||
961 | static int gmc_v7_0_sw_fini(void *handle) | 962 | static int gmc_v7_0_sw_fini(void *handle) |
962 | { | 963 | { |
963 | int i; | ||
964 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 964 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
965 | 965 | ||
966 | if (adev->vm_manager.enabled) { | 966 | if (adev->vm_manager.enabled) { |
967 | for (i = 0; i < AMDGPU_NUM_VM; ++i) | 967 | amdgpu_vm_manager_fini(adev); |
968 | fence_put(adev->vm_manager.active[i]); | ||
969 | gmc_v7_0_vm_fini(adev); | 968 | gmc_v7_0_vm_fini(adev); |
970 | adev->vm_manager.enabled = false; | 969 | adev->vm_manager.enabled = false; |
971 | } | 970 | } |
@@ -1010,12 +1009,10 @@ static int gmc_v7_0_hw_fini(void *handle) | |||
1010 | 1009 | ||
1011 | static int gmc_v7_0_suspend(void *handle) | 1010 | static int gmc_v7_0_suspend(void *handle) |
1012 | { | 1011 | { |
1013 | int i; | ||
1014 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1012 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
1015 | 1013 | ||
1016 | if (adev->vm_manager.enabled) { | 1014 | if (adev->vm_manager.enabled) { |
1017 | for (i = 0; i < AMDGPU_NUM_VM; ++i) | 1015 | amdgpu_vm_manager_fini(adev); |
1018 | fence_put(adev->vm_manager.active[i]); | ||
1019 | gmc_v7_0_vm_fini(adev); | 1016 | gmc_v7_0_vm_fini(adev); |
1020 | adev->vm_manager.enabled = false; | 1017 | adev->vm_manager.enabled = false; |
1021 | } | 1018 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 1bcc4e74e3b4..cb0e50ebb528 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | |||
@@ -629,6 +629,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) | |||
629 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1); | 629 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1); |
630 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7); | 630 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7); |
631 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); | 631 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); |
632 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); | ||
632 | WREG32(mmVM_L2_CNTL, tmp); | 633 | WREG32(mmVM_L2_CNTL, tmp); |
633 | tmp = RREG32(mmVM_L2_CNTL2); | 634 | tmp = RREG32(mmVM_L2_CNTL2); |
634 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); | 635 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); |
@@ -979,12 +980,10 @@ static int gmc_v8_0_sw_init(void *handle) | |||
979 | 980 | ||
980 | static int gmc_v8_0_sw_fini(void *handle) | 981 | static int gmc_v8_0_sw_fini(void *handle) |
981 | { | 982 | { |
982 | int i; | ||
983 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 983 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
984 | 984 | ||
985 | if (adev->vm_manager.enabled) { | 985 | if (adev->vm_manager.enabled) { |
986 | for (i = 0; i < AMDGPU_NUM_VM; ++i) | 986 | amdgpu_vm_manager_fini(adev); |
987 | fence_put(adev->vm_manager.active[i]); | ||
988 | gmc_v8_0_vm_fini(adev); | 987 | gmc_v8_0_vm_fini(adev); |
989 | adev->vm_manager.enabled = false; | 988 | adev->vm_manager.enabled = false; |
990 | } | 989 | } |
@@ -1031,12 +1030,10 @@ static int gmc_v8_0_hw_fini(void *handle) | |||
1031 | 1030 | ||
1032 | static int gmc_v8_0_suspend(void *handle) | 1031 | static int gmc_v8_0_suspend(void *handle) |
1033 | { | 1032 | { |
1034 | int i; | ||
1035 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1033 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
1036 | 1034 | ||
1037 | if (adev->vm_manager.enabled) { | 1035 | if (adev->vm_manager.enabled) { |
1038 | for (i = 0; i < AMDGPU_NUM_VM; ++i) | 1036 | amdgpu_vm_manager_fini(adev); |
1039 | fence_put(adev->vm_manager.active[i]); | ||
1040 | gmc_v8_0_vm_fini(adev); | 1037 | gmc_v8_0_vm_fini(adev); |
1041 | adev->vm_manager.enabled = false; | 1038 | adev->vm_manager.enabled = false; |
1042 | } | 1039 | } |