diff options
35 files changed, 521 insertions, 381 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a5427cf4b19d..12e71bbfd222 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -96,6 +96,7 @@ extern int amdgpu_bapm; | |||
96 | extern int amdgpu_deep_color; | 96 | extern int amdgpu_deep_color; |
97 | extern int amdgpu_vm_size; | 97 | extern int amdgpu_vm_size; |
98 | extern int amdgpu_vm_block_size; | 98 | extern int amdgpu_vm_block_size; |
99 | extern int amdgpu_vm_fragment_size; | ||
99 | extern int amdgpu_vm_fault_stop; | 100 | extern int amdgpu_vm_fault_stop; |
100 | extern int amdgpu_vm_debug; | 101 | extern int amdgpu_vm_debug; |
101 | extern int amdgpu_vm_update_mode; | 102 | extern int amdgpu_vm_update_mode; |
@@ -748,6 +749,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); | |||
748 | struct amdgpu_fpriv { | 749 | struct amdgpu_fpriv { |
749 | struct amdgpu_vm vm; | 750 | struct amdgpu_vm vm; |
750 | struct amdgpu_bo_va *prt_va; | 751 | struct amdgpu_bo_va *prt_va; |
752 | struct amdgpu_bo_va *csa_va; | ||
751 | struct mutex bo_list_lock; | 753 | struct mutex bo_list_lock; |
752 | struct idr bo_list_handles; | 754 | struct idr bo_list_handles; |
753 | struct amdgpu_ctx_mgr ctx_mgr; | 755 | struct amdgpu_ctx_mgr ctx_mgr; |
@@ -1482,9 +1484,6 @@ struct amdgpu_device { | |||
1482 | struct amdgpu_mman mman; | 1484 | struct amdgpu_mman mman; |
1483 | struct amdgpu_vram_scratch vram_scratch; | 1485 | struct amdgpu_vram_scratch vram_scratch; |
1484 | struct amdgpu_wb wb; | 1486 | struct amdgpu_wb wb; |
1485 | atomic64_t vram_usage; | ||
1486 | atomic64_t vram_vis_usage; | ||
1487 | atomic64_t gtt_usage; | ||
1488 | atomic64_t num_bytes_moved; | 1487 | atomic64_t num_bytes_moved; |
1489 | atomic64_t num_evictions; | 1488 | atomic64_t num_evictions; |
1490 | atomic64_t num_vram_cpu_page_faults; | 1489 | atomic64_t num_vram_cpu_page_faults; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c05479ec825a..15d4a28d73bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -246,7 +246,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, | |||
246 | } | 246 | } |
247 | 247 | ||
248 | total_vram = adev->mc.real_vram_size - adev->vram_pin_size; | 248 | total_vram = adev->mc.real_vram_size - adev->vram_pin_size; |
249 | used_vram = atomic64_read(&adev->vram_usage); | 249 | used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); |
250 | free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; | 250 | free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; |
251 | 251 | ||
252 | spin_lock(&adev->mm_stats.lock); | 252 | spin_lock(&adev->mm_stats.lock); |
@@ -292,7 +292,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, | |||
292 | /* Do the same for visible VRAM if half of it is free */ | 292 | /* Do the same for visible VRAM if half of it is free */ |
293 | if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { | 293 | if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { |
294 | u64 total_vis_vram = adev->mc.visible_vram_size; | 294 | u64 total_vis_vram = adev->mc.visible_vram_size; |
295 | u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage); | 295 | u64 used_vis_vram = |
296 | amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); | ||
296 | 297 | ||
297 | if (used_vis_vram < total_vis_vram) { | 298 | if (used_vis_vram < total_vis_vram) { |
298 | u64 free_vis_vram = total_vis_vram - used_vis_vram; | 299 | u64 free_vis_vram = total_vis_vram - used_vis_vram; |
@@ -673,10 +674,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
673 | } | 674 | } |
674 | 675 | ||
675 | error_validate: | 676 | error_validate: |
676 | if (r) { | 677 | if (r) |
677 | amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm); | ||
678 | ttm_eu_backoff_reservation(&p->ticket, &p->validated); | 678 | ttm_eu_backoff_reservation(&p->ticket, &p->validated); |
679 | } | ||
680 | 679 | ||
681 | error_free_pages: | 680 | error_free_pages: |
682 | 681 | ||
@@ -724,21 +723,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) | |||
724 | * If error is set than unvalidate buffer, otherwise just free memory | 723 | * If error is set than unvalidate buffer, otherwise just free memory |
725 | * used by parsing context. | 724 | * used by parsing context. |
726 | **/ | 725 | **/ |
727 | static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) | 726 | static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, |
727 | bool backoff) | ||
728 | { | 728 | { |
729 | struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; | ||
730 | unsigned i; | 729 | unsigned i; |
731 | 730 | ||
732 | if (!error) { | 731 | if (!error) |
733 | amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm); | ||
734 | |||
735 | ttm_eu_fence_buffer_objects(&parser->ticket, | 732 | ttm_eu_fence_buffer_objects(&parser->ticket, |
736 | &parser->validated, | 733 | &parser->validated, |
737 | parser->fence); | 734 | parser->fence); |
738 | } else if (backoff) { | 735 | else if (backoff) |
739 | ttm_eu_backoff_reservation(&parser->ticket, | 736 | ttm_eu_backoff_reservation(&parser->ticket, |
740 | &parser->validated); | 737 | &parser->validated); |
741 | } | ||
742 | 738 | ||
743 | for (i = 0; i < parser->num_post_dep_syncobjs; i++) | 739 | for (i = 0; i < parser->num_post_dep_syncobjs; i++) |
744 | drm_syncobj_put(parser->post_dep_syncobjs[i]); | 740 | drm_syncobj_put(parser->post_dep_syncobjs[i]); |
@@ -791,7 +787,8 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
791 | 787 | ||
792 | if (amdgpu_sriov_vf(adev)) { | 788 | if (amdgpu_sriov_vf(adev)) { |
793 | struct dma_fence *f; | 789 | struct dma_fence *f; |
794 | bo_va = vm->csa_bo_va; | 790 | |
791 | bo_va = fpriv->csa_va; | ||
795 | BUG_ON(!bo_va); | 792 | BUG_ON(!bo_va); |
796 | r = amdgpu_vm_bo_update(adev, bo_va, false); | 793 | r = amdgpu_vm_bo_update(adev, bo_va, false); |
797 | if (r) | 794 | if (r) |
@@ -828,7 +825,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
828 | 825 | ||
829 | } | 826 | } |
830 | 827 | ||
831 | r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync); | 828 | r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync); |
832 | 829 | ||
833 | if (amdgpu_vm_debug && p->bo_list) { | 830 | if (amdgpu_vm_debug && p->bo_list) { |
834 | /* Invalidate all BOs to test for userspace bugs */ | 831 | /* Invalidate all BOs to test for userspace bugs */ |
@@ -1490,7 +1487,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, | |||
1490 | addr > mapping->last) | 1487 | addr > mapping->last) |
1491 | continue; | 1488 | continue; |
1492 | 1489 | ||
1493 | *bo = lobj->bo_va->bo; | 1490 | *bo = lobj->bo_va->base.bo; |
1494 | return mapping; | 1491 | return mapping; |
1495 | } | 1492 | } |
1496 | 1493 | ||
@@ -1499,7 +1496,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, | |||
1499 | addr > mapping->last) | 1496 | addr > mapping->last) |
1500 | continue; | 1497 | continue; |
1501 | 1498 | ||
1502 | *bo = lobj->bo_va->bo; | 1499 | *bo = lobj->bo_va->base.bo; |
1503 | return mapping; | 1500 | return mapping; |
1504 | } | 1501 | } |
1505 | } | 1502 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a6f6cb0f2e02..1a459ac63df4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -1076,6 +1076,13 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) | |||
1076 | amdgpu_gtt_size = -1; | 1076 | amdgpu_gtt_size = -1; |
1077 | } | 1077 | } |
1078 | 1078 | ||
1079 | /* valid range is between 4 and 9 inclusive */ | ||
1080 | if (amdgpu_vm_fragment_size != -1 && | ||
1081 | (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { | ||
1082 | dev_warn(adev->dev, "valid range is between 4 and 9\n"); | ||
1083 | amdgpu_vm_fragment_size = -1; | ||
1084 | } | ||
1085 | |||
1079 | amdgpu_check_vm_size(adev); | 1086 | amdgpu_check_vm_size(adev); |
1080 | 1087 | ||
1081 | amdgpu_check_block_size(adev); | 1088 | amdgpu_check_block_size(adev); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5e9ce8a29669..e39ec981b11c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | |||
@@ -68,9 +68,10 @@ | |||
68 | * - 3.16.0 - Add reserved vmid support | 68 | * - 3.16.0 - Add reserved vmid support |
69 | * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS. | 69 | * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS. |
70 | * - 3.18.0 - Export gpu always on cu bitmap | 70 | * - 3.18.0 - Export gpu always on cu bitmap |
71 | * - 3.19.0 - Add support for UVD MJPEG decode | ||
71 | */ | 72 | */ |
72 | #define KMS_DRIVER_MAJOR 3 | 73 | #define KMS_DRIVER_MAJOR 3 |
73 | #define KMS_DRIVER_MINOR 18 | 74 | #define KMS_DRIVER_MINOR 19 |
74 | #define KMS_DRIVER_PATCHLEVEL 0 | 75 | #define KMS_DRIVER_PATCHLEVEL 0 |
75 | 76 | ||
76 | int amdgpu_vram_limit = 0; | 77 | int amdgpu_vram_limit = 0; |
@@ -94,6 +95,7 @@ unsigned amdgpu_ip_block_mask = 0xffffffff; | |||
94 | int amdgpu_bapm = -1; | 95 | int amdgpu_bapm = -1; |
95 | int amdgpu_deep_color = 0; | 96 | int amdgpu_deep_color = 0; |
96 | int amdgpu_vm_size = -1; | 97 | int amdgpu_vm_size = -1; |
98 | int amdgpu_vm_fragment_size = -1; | ||
97 | int amdgpu_vm_block_size = -1; | 99 | int amdgpu_vm_block_size = -1; |
98 | int amdgpu_vm_fault_stop = 0; | 100 | int amdgpu_vm_fault_stop = 0; |
99 | int amdgpu_vm_debug = 0; | 101 | int amdgpu_vm_debug = 0; |
@@ -183,6 +185,9 @@ module_param_named(deep_color, amdgpu_deep_color, int, 0444); | |||
183 | MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)"); | 185 | MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)"); |
184 | module_param_named(vm_size, amdgpu_vm_size, int, 0444); | 186 | module_param_named(vm_size, amdgpu_vm_size, int, 0444); |
185 | 187 | ||
188 | MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)"); | ||
189 | module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444); | ||
190 | |||
186 | MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)"); | 191 | MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)"); |
187 | module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444); | 192 | module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444); |
188 | 193 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 81127ffcefb2..7171968f261e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | |||
@@ -225,9 +225,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, | |||
225 | if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | 225 | if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | |
226 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | | 226 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | |
227 | AMDGPU_GEM_CREATE_CPU_GTT_USWC | | 227 | AMDGPU_GEM_CREATE_CPU_GTT_USWC | |
228 | AMDGPU_GEM_CREATE_VRAM_CLEARED| | 228 | AMDGPU_GEM_CREATE_VRAM_CLEARED)) |
229 | AMDGPU_GEM_CREATE_SHADOW | | ||
230 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) | ||
231 | return -EINVAL; | 229 | return -EINVAL; |
232 | 230 | ||
233 | /* reject invalid gem domains */ | 231 | /* reject invalid gem domains */ |
@@ -623,7 +621,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
623 | 621 | ||
624 | switch (args->operation) { | 622 | switch (args->operation) { |
625 | case AMDGPU_VA_OP_MAP: | 623 | case AMDGPU_VA_OP_MAP: |
626 | r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, | 624 | r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, |
627 | args->map_size); | 625 | args->map_size); |
628 | if (r) | 626 | if (r) |
629 | goto error_backoff; | 627 | goto error_backoff; |
@@ -643,7 +641,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
643 | args->map_size); | 641 | args->map_size); |
644 | break; | 642 | break; |
645 | case AMDGPU_VA_OP_REPLACE: | 643 | case AMDGPU_VA_OP_REPLACE: |
646 | r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, | 644 | r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, |
647 | args->map_size); | 645 | args->map_size); |
648 | if (r) | 646 | if (r) |
649 | goto error_backoff; | 647 | goto error_backoff; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 5e6b90c6794f..9e05e257729f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | |||
@@ -28,7 +28,7 @@ | |||
28 | struct amdgpu_gtt_mgr { | 28 | struct amdgpu_gtt_mgr { |
29 | struct drm_mm mm; | 29 | struct drm_mm mm; |
30 | spinlock_t lock; | 30 | spinlock_t lock; |
31 | uint64_t available; | 31 | atomic64_t available; |
32 | }; | 32 | }; |
33 | 33 | ||
34 | /** | 34 | /** |
@@ -54,7 +54,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, | |||
54 | size = (adev->mc.gart_size >> PAGE_SHIFT) - start; | 54 | size = (adev->mc.gart_size >> PAGE_SHIFT) - start; |
55 | drm_mm_init(&mgr->mm, start, size); | 55 | drm_mm_init(&mgr->mm, start, size); |
56 | spin_lock_init(&mgr->lock); | 56 | spin_lock_init(&mgr->lock); |
57 | mgr->available = p_size; | 57 | atomic64_set(&mgr->available, p_size); |
58 | man->priv = mgr; | 58 | man->priv = mgr; |
59 | return 0; | 59 | return 0; |
60 | } | 60 | } |
@@ -153,15 +153,6 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, | |||
153 | return r; | 153 | return r; |
154 | } | 154 | } |
155 | 155 | ||
156 | void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager *man) | ||
157 | { | ||
158 | struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); | ||
159 | struct amdgpu_gtt_mgr *mgr = man->priv; | ||
160 | |||
161 | seq_printf(m, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", | ||
162 | man->size, mgr->available, (u64)atomic64_read(&adev->gtt_usage) >> 20); | ||
163 | |||
164 | } | ||
165 | /** | 156 | /** |
166 | * amdgpu_gtt_mgr_new - allocate a new node | 157 | * amdgpu_gtt_mgr_new - allocate a new node |
167 | * | 158 | * |
@@ -182,11 +173,11 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, | |||
182 | int r; | 173 | int r; |
183 | 174 | ||
184 | spin_lock(&mgr->lock); | 175 | spin_lock(&mgr->lock); |
185 | if (mgr->available < mem->num_pages) { | 176 | if (atomic64_read(&mgr->available) < mem->num_pages) { |
186 | spin_unlock(&mgr->lock); | 177 | spin_unlock(&mgr->lock); |
187 | return 0; | 178 | return 0; |
188 | } | 179 | } |
189 | mgr->available -= mem->num_pages; | 180 | atomic64_sub(mem->num_pages, &mgr->available); |
190 | spin_unlock(&mgr->lock); | 181 | spin_unlock(&mgr->lock); |
191 | 182 | ||
192 | node = kzalloc(sizeof(*node), GFP_KERNEL); | 183 | node = kzalloc(sizeof(*node), GFP_KERNEL); |
@@ -213,9 +204,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, | |||
213 | 204 | ||
214 | return 0; | 205 | return 0; |
215 | err_out: | 206 | err_out: |
216 | spin_lock(&mgr->lock); | 207 | atomic64_add(mem->num_pages, &mgr->available); |
217 | mgr->available += mem->num_pages; | ||
218 | spin_unlock(&mgr->lock); | ||
219 | 208 | ||
220 | return r; | 209 | return r; |
221 | } | 210 | } |
@@ -242,30 +231,47 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man, | |||
242 | spin_lock(&mgr->lock); | 231 | spin_lock(&mgr->lock); |
243 | if (node->start != AMDGPU_BO_INVALID_OFFSET) | 232 | if (node->start != AMDGPU_BO_INVALID_OFFSET) |
244 | drm_mm_remove_node(node); | 233 | drm_mm_remove_node(node); |
245 | mgr->available += mem->num_pages; | ||
246 | spin_unlock(&mgr->lock); | 234 | spin_unlock(&mgr->lock); |
235 | atomic64_add(mem->num_pages, &mgr->available); | ||
247 | 236 | ||
248 | kfree(node); | 237 | kfree(node); |
249 | mem->mm_node = NULL; | 238 | mem->mm_node = NULL; |
250 | } | 239 | } |
251 | 240 | ||
252 | /** | 241 | /** |
242 | * amdgpu_gtt_mgr_usage - return usage of GTT domain | ||
243 | * | ||
244 | * @man: TTM memory type manager | ||
245 | * | ||
246 | * Return how many bytes are used in the GTT domain | ||
247 | */ | ||
248 | uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man) | ||
249 | { | ||
250 | struct amdgpu_gtt_mgr *mgr = man->priv; | ||
251 | |||
252 | return (u64)(man->size - atomic64_read(&mgr->available)) * PAGE_SIZE; | ||
253 | } | ||
254 | |||
255 | /** | ||
253 | * amdgpu_gtt_mgr_debug - dump VRAM table | 256 | * amdgpu_gtt_mgr_debug - dump VRAM table |
254 | * | 257 | * |
255 | * @man: TTM memory type manager | 258 | * @man: TTM memory type manager |
256 | * @prefix: text prefix | 259 | * @printer: DRM printer to use |
257 | * | 260 | * |
258 | * Dump the table content using printk. | 261 | * Dump the table content using printk. |
259 | */ | 262 | */ |
260 | static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, | 263 | static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, |
261 | const char *prefix) | 264 | struct drm_printer *printer) |
262 | { | 265 | { |
263 | struct amdgpu_gtt_mgr *mgr = man->priv; | 266 | struct amdgpu_gtt_mgr *mgr = man->priv; |
264 | struct drm_printer p = drm_debug_printer(prefix); | ||
265 | 267 | ||
266 | spin_lock(&mgr->lock); | 268 | spin_lock(&mgr->lock); |
267 | drm_mm_print(&mgr->mm, &p); | 269 | drm_mm_print(&mgr->mm, printer); |
268 | spin_unlock(&mgr->lock); | 270 | spin_unlock(&mgr->lock); |
271 | |||
272 | drm_printf(printer, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", | ||
273 | man->size, (u64)atomic64_read(&mgr->available), | ||
274 | amdgpu_gtt_mgr_usage(man) >> 20); | ||
269 | } | 275 | } |
270 | 276 | ||
271 | const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func = { | 277 | const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func = { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index c908f972283c..e16229000a98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | |||
@@ -455,13 +455,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
455 | ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); | 455 | ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); |
456 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; | 456 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; |
457 | case AMDGPU_INFO_VRAM_USAGE: | 457 | case AMDGPU_INFO_VRAM_USAGE: |
458 | ui64 = atomic64_read(&adev->vram_usage); | 458 | ui64 = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); |
459 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; | 459 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; |
460 | case AMDGPU_INFO_VIS_VRAM_USAGE: | 460 | case AMDGPU_INFO_VIS_VRAM_USAGE: |
461 | ui64 = atomic64_read(&adev->vram_vis_usage); | 461 | ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); |
462 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; | 462 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; |
463 | case AMDGPU_INFO_GTT_USAGE: | 463 | case AMDGPU_INFO_GTT_USAGE: |
464 | ui64 = atomic64_read(&adev->gtt_usage); | 464 | ui64 = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); |
465 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; | 465 | return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; |
466 | case AMDGPU_INFO_GDS_CONFIG: { | 466 | case AMDGPU_INFO_GDS_CONFIG: { |
467 | struct drm_amdgpu_info_gds gds_info; | 467 | struct drm_amdgpu_info_gds gds_info; |
@@ -497,7 +497,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
497 | mem.vram.total_heap_size = adev->mc.real_vram_size; | 497 | mem.vram.total_heap_size = adev->mc.real_vram_size; |
498 | mem.vram.usable_heap_size = | 498 | mem.vram.usable_heap_size = |
499 | adev->mc.real_vram_size - adev->vram_pin_size; | 499 | adev->mc.real_vram_size - adev->vram_pin_size; |
500 | mem.vram.heap_usage = atomic64_read(&adev->vram_usage); | 500 | mem.vram.heap_usage = |
501 | amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); | ||
501 | mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; | 502 | mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; |
502 | 503 | ||
503 | mem.cpu_accessible_vram.total_heap_size = | 504 | mem.cpu_accessible_vram.total_heap_size = |
@@ -506,7 +507,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
506 | adev->mc.visible_vram_size - | 507 | adev->mc.visible_vram_size - |
507 | (adev->vram_pin_size - adev->invisible_pin_size); | 508 | (adev->vram_pin_size - adev->invisible_pin_size); |
508 | mem.cpu_accessible_vram.heap_usage = | 509 | mem.cpu_accessible_vram.heap_usage = |
509 | atomic64_read(&adev->vram_vis_usage); | 510 | amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); |
510 | mem.cpu_accessible_vram.max_allocation = | 511 | mem.cpu_accessible_vram.max_allocation = |
511 | mem.cpu_accessible_vram.usable_heap_size * 3 / 4; | 512 | mem.cpu_accessible_vram.usable_heap_size * 3 / 4; |
512 | 513 | ||
@@ -514,7 +515,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
514 | mem.gtt.total_heap_size *= PAGE_SIZE; | 515 | mem.gtt.total_heap_size *= PAGE_SIZE; |
515 | mem.gtt.usable_heap_size = mem.gtt.total_heap_size | 516 | mem.gtt.usable_heap_size = mem.gtt.total_heap_size |
516 | - adev->gart_pin_size; | 517 | - adev->gart_pin_size; |
517 | mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage); | 518 | mem.gtt.heap_usage = |
519 | amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); | ||
518 | mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; | 520 | mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; |
519 | 521 | ||
520 | return copy_to_user(out, &mem, | 522 | return copy_to_user(out, &mem, |
@@ -588,11 +590,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
588 | dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; | 590 | dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; |
589 | dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; | 591 | dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; |
590 | dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); | 592 | dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); |
591 | dev_info.pte_fragment_size = | 593 | dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; |
592 | (1 << AMDGPU_LOG2_PAGES_PER_FRAG(adev)) * | ||
593 | AMDGPU_GPU_PAGE_SIZE; | ||
594 | dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; | 594 | dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; |
595 | |||
596 | dev_info.cu_active_number = adev->gfx.cu_info.number; | 595 | dev_info.cu_active_number = adev->gfx.cu_info.number; |
597 | dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; | 596 | dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; |
598 | dev_info.ce_ram_size = adev->gfx.ce_ram_size; | 597 | dev_info.ce_ram_size = adev->gfx.ce_ram_size; |
@@ -841,7 +840,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) | |||
841 | } | 840 | } |
842 | 841 | ||
843 | if (amdgpu_sriov_vf(adev)) { | 842 | if (amdgpu_sriov_vf(adev)) { |
844 | r = amdgpu_map_static_csa(adev, &fpriv->vm); | 843 | r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va); |
845 | if (r) | 844 | if (r) |
846 | goto out_suspend; | 845 | goto out_suspend; |
847 | } | 846 | } |
@@ -894,8 +893,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, | |||
894 | if (amdgpu_sriov_vf(adev)) { | 893 | if (amdgpu_sriov_vf(adev)) { |
895 | /* TODO: how to handle reserve failure */ | 894 | /* TODO: how to handle reserve failure */ |
896 | BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); | 895 | BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); |
897 | amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va); | 896 | amdgpu_vm_bo_rmv(adev, fpriv->csa_va); |
898 | fpriv->vm.csa_bo_va = NULL; | 897 | fpriv->csa_va = NULL; |
899 | amdgpu_bo_unreserve(adev->virt.csa_obj); | 898 | amdgpu_bo_unreserve(adev->virt.csa_obj); |
900 | } | 899 | } |
901 | 900 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6e72fe7901ec..e7e899190bef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | |||
@@ -37,55 +37,6 @@ | |||
37 | #include "amdgpu.h" | 37 | #include "amdgpu.h" |
38 | #include "amdgpu_trace.h" | 38 | #include "amdgpu_trace.h" |
39 | 39 | ||
40 | |||
41 | |||
42 | static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev, | ||
43 | struct ttm_mem_reg *mem) | ||
44 | { | ||
45 | if (mem->start << PAGE_SHIFT >= adev->mc.visible_vram_size) | ||
46 | return 0; | ||
47 | |||
48 | return ((mem->start << PAGE_SHIFT) + mem->size) > | ||
49 | adev->mc.visible_vram_size ? | ||
50 | adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) : | ||
51 | mem->size; | ||
52 | } | ||
53 | |||
54 | static void amdgpu_update_memory_usage(struct amdgpu_device *adev, | ||
55 | struct ttm_mem_reg *old_mem, | ||
56 | struct ttm_mem_reg *new_mem) | ||
57 | { | ||
58 | u64 vis_size; | ||
59 | if (!adev) | ||
60 | return; | ||
61 | |||
62 | if (new_mem) { | ||
63 | switch (new_mem->mem_type) { | ||
64 | case TTM_PL_TT: | ||
65 | atomic64_add(new_mem->size, &adev->gtt_usage); | ||
66 | break; | ||
67 | case TTM_PL_VRAM: | ||
68 | atomic64_add(new_mem->size, &adev->vram_usage); | ||
69 | vis_size = amdgpu_get_vis_part_size(adev, new_mem); | ||
70 | atomic64_add(vis_size, &adev->vram_vis_usage); | ||
71 | break; | ||
72 | } | ||
73 | } | ||
74 | |||
75 | if (old_mem) { | ||
76 | switch (old_mem->mem_type) { | ||
77 | case TTM_PL_TT: | ||
78 | atomic64_sub(old_mem->size, &adev->gtt_usage); | ||
79 | break; | ||
80 | case TTM_PL_VRAM: | ||
81 | atomic64_sub(old_mem->size, &adev->vram_usage); | ||
82 | vis_size = amdgpu_get_vis_part_size(adev, old_mem); | ||
83 | atomic64_sub(vis_size, &adev->vram_vis_usage); | ||
84 | break; | ||
85 | } | ||
86 | } | ||
87 | } | ||
88 | |||
89 | static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) | 40 | static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) |
90 | { | 41 | { |
91 | struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); | 42 | struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); |
@@ -94,7 +45,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) | |||
94 | bo = container_of(tbo, struct amdgpu_bo, tbo); | 45 | bo = container_of(tbo, struct amdgpu_bo, tbo); |
95 | 46 | ||
96 | amdgpu_bo_kunmap(bo); | 47 | amdgpu_bo_kunmap(bo); |
97 | amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL); | ||
98 | 48 | ||
99 | drm_gem_object_release(&bo->gem_base); | 49 | drm_gem_object_release(&bo->gem_base); |
100 | amdgpu_bo_unref(&bo->parent); | 50 | amdgpu_bo_unref(&bo->parent); |
@@ -992,8 +942,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, | |||
992 | return; | 942 | return; |
993 | 943 | ||
994 | /* move_notify is called before move happens */ | 944 | /* move_notify is called before move happens */ |
995 | amdgpu_update_memory_usage(adev, &bo->mem, new_mem); | ||
996 | |||
997 | trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); | 945 | trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); |
998 | } | 946 | } |
999 | 947 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 9b7b4fcb047b..a288fa6d72c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | |||
@@ -33,6 +33,7 @@ | |||
33 | 33 | ||
34 | #define AMDGPU_BO_INVALID_OFFSET LONG_MAX | 34 | #define AMDGPU_BO_INVALID_OFFSET LONG_MAX |
35 | 35 | ||
36 | /* bo virtual addresses in a vm */ | ||
36 | struct amdgpu_bo_va_mapping { | 37 | struct amdgpu_bo_va_mapping { |
37 | struct list_head list; | 38 | struct list_head list; |
38 | struct rb_node rb; | 39 | struct rb_node rb; |
@@ -43,26 +44,19 @@ struct amdgpu_bo_va_mapping { | |||
43 | uint64_t flags; | 44 | uint64_t flags; |
44 | }; | 45 | }; |
45 | 46 | ||
46 | /* bo virtual addresses in a specific vm */ | 47 | /* User space allocated BO in a VM */ |
47 | struct amdgpu_bo_va { | 48 | struct amdgpu_bo_va { |
49 | struct amdgpu_vm_bo_base base; | ||
50 | |||
48 | /* protected by bo being reserved */ | 51 | /* protected by bo being reserved */ |
49 | struct list_head bo_list; | ||
50 | struct dma_fence *last_pt_update; | 52 | struct dma_fence *last_pt_update; |
51 | unsigned ref_count; | 53 | unsigned ref_count; |
52 | 54 | ||
53 | /* protected by vm mutex and spinlock */ | ||
54 | struct list_head vm_status; | ||
55 | |||
56 | /* mappings for this bo_va */ | 55 | /* mappings for this bo_va */ |
57 | struct list_head invalids; | 56 | struct list_head invalids; |
58 | struct list_head valids; | 57 | struct list_head valids; |
59 | |||
60 | /* constant after initialization */ | ||
61 | struct amdgpu_vm *vm; | ||
62 | struct amdgpu_bo *bo; | ||
63 | }; | 58 | }; |
64 | 59 | ||
65 | |||
66 | struct amdgpu_bo { | 60 | struct amdgpu_bo { |
67 | /* Protected by tbo.reserved */ | 61 | /* Protected by tbo.reserved */ |
68 | u32 preferred_domains; | 62 | u32 preferred_domains; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index a6899180b265..c586f44312f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | |||
@@ -244,6 +244,12 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, | |||
244 | struct dma_fence *f = e->fence; | 244 | struct dma_fence *f = e->fence; |
245 | struct amd_sched_fence *s_fence = to_amd_sched_fence(f); | 245 | struct amd_sched_fence *s_fence = to_amd_sched_fence(f); |
246 | 246 | ||
247 | if (dma_fence_is_signaled(f)) { | ||
248 | hash_del(&e->node); | ||
249 | dma_fence_put(f); | ||
250 | kmem_cache_free(amdgpu_sync_slab, e); | ||
251 | continue; | ||
252 | } | ||
247 | if (ring && s_fence) { | 253 | if (ring && s_fence) { |
248 | /* For fences from the same ring it is sufficient | 254 | /* For fences from the same ring it is sufficient |
249 | * when they are scheduled. | 255 | * when they are scheduled. |
@@ -256,13 +262,6 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, | |||
256 | } | 262 | } |
257 | } | 263 | } |
258 | 264 | ||
259 | if (dma_fence_is_signaled(f)) { | ||
260 | hash_del(&e->node); | ||
261 | dma_fence_put(f); | ||
262 | kmem_cache_free(amdgpu_sync_slab, e); | ||
263 | continue; | ||
264 | } | ||
265 | |||
266 | return f; | 265 | return f; |
267 | } | 266 | } |
268 | 267 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 9ab58245e518..1c88bd5e29ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | |||
@@ -14,6 +14,62 @@ | |||
14 | #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ | 14 | #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ |
15 | job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished) | 15 | job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished) |
16 | 16 | ||
17 | TRACE_EVENT(amdgpu_ttm_tt_populate, | ||
18 | TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address), | ||
19 | TP_ARGS(adev, dma_address, phys_address), | ||
20 | TP_STRUCT__entry( | ||
21 | __field(uint16_t, domain) | ||
22 | __field(uint8_t, bus) | ||
23 | __field(uint8_t, slot) | ||
24 | __field(uint8_t, func) | ||
25 | __field(uint64_t, dma) | ||
26 | __field(uint64_t, phys) | ||
27 | ), | ||
28 | TP_fast_assign( | ||
29 | __entry->domain = pci_domain_nr(adev->pdev->bus); | ||
30 | __entry->bus = adev->pdev->bus->number; | ||
31 | __entry->slot = PCI_SLOT(adev->pdev->devfn); | ||
32 | __entry->func = PCI_FUNC(adev->pdev->devfn); | ||
33 | __entry->dma = dma_address; | ||
34 | __entry->phys = phys_address; | ||
35 | ), | ||
36 | TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", | ||
37 | (unsigned)__entry->domain, | ||
38 | (unsigned)__entry->bus, | ||
39 | (unsigned)__entry->slot, | ||
40 | (unsigned)__entry->func, | ||
41 | (unsigned long long)__entry->dma, | ||
42 | (unsigned long long)__entry->phys) | ||
43 | ); | ||
44 | |||
45 | TRACE_EVENT(amdgpu_ttm_tt_unpopulate, | ||
46 | TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address), | ||
47 | TP_ARGS(adev, dma_address, phys_address), | ||
48 | TP_STRUCT__entry( | ||
49 | __field(uint16_t, domain) | ||
50 | __field(uint8_t, bus) | ||
51 | __field(uint8_t, slot) | ||
52 | __field(uint8_t, func) | ||
53 | __field(uint64_t, dma) | ||
54 | __field(uint64_t, phys) | ||
55 | ), | ||
56 | TP_fast_assign( | ||
57 | __entry->domain = pci_domain_nr(adev->pdev->bus); | ||
58 | __entry->bus = adev->pdev->bus->number; | ||
59 | __entry->slot = PCI_SLOT(adev->pdev->devfn); | ||
60 | __entry->func = PCI_FUNC(adev->pdev->devfn); | ||
61 | __entry->dma = dma_address; | ||
62 | __entry->phys = phys_address; | ||
63 | ), | ||
64 | TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", | ||
65 | (unsigned)__entry->domain, | ||
66 | (unsigned)__entry->bus, | ||
67 | (unsigned)__entry->slot, | ||
68 | (unsigned)__entry->func, | ||
69 | (unsigned long long)__entry->dma, | ||
70 | (unsigned long long)__entry->phys) | ||
71 | ); | ||
72 | |||
17 | TRACE_EVENT(amdgpu_mm_rreg, | 73 | TRACE_EVENT(amdgpu_mm_rreg, |
18 | TP_PROTO(unsigned did, uint32_t reg, uint32_t value), | 74 | TP_PROTO(unsigned did, uint32_t reg, uint32_t value), |
19 | TP_ARGS(did, reg, value), | 75 | TP_ARGS(did, reg, value), |
@@ -228,7 +284,7 @@ TRACE_EVENT(amdgpu_vm_bo_map, | |||
228 | ), | 284 | ), |
229 | 285 | ||
230 | TP_fast_assign( | 286 | TP_fast_assign( |
231 | __entry->bo = bo_va ? bo_va->bo : NULL; | 287 | __entry->bo = bo_va ? bo_va->base.bo : NULL; |
232 | __entry->start = mapping->start; | 288 | __entry->start = mapping->start; |
233 | __entry->last = mapping->last; | 289 | __entry->last = mapping->last; |
234 | __entry->offset = mapping->offset; | 290 | __entry->offset = mapping->offset; |
@@ -252,7 +308,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap, | |||
252 | ), | 308 | ), |
253 | 309 | ||
254 | TP_fast_assign( | 310 | TP_fast_assign( |
255 | __entry->bo = bo_va->bo; | 311 | __entry->bo = bo_va->base.bo; |
256 | __entry->start = mapping->start; | 312 | __entry->start = mapping->start; |
257 | __entry->last = mapping->last; | 313 | __entry->last = mapping->last; |
258 | __entry->offset = mapping->offset; | 314 | __entry->offset = mapping->offset; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c803b082324d..8b2c294f6f79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/pagemap.h> | 43 | #include <linux/pagemap.h> |
44 | #include <linux/debugfs.h> | 44 | #include <linux/debugfs.h> |
45 | #include "amdgpu.h" | 45 | #include "amdgpu.h" |
46 | #include "amdgpu_trace.h" | ||
46 | #include "bif/bif_4_1_d.h" | 47 | #include "bif/bif_4_1_d.h" |
47 | 48 | ||
48 | #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) | 49 | #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) |
@@ -662,6 +663,38 @@ release_pages: | |||
662 | return r; | 663 | return r; |
663 | } | 664 | } |
664 | 665 | ||
666 | static void amdgpu_trace_dma_map(struct ttm_tt *ttm) | ||
667 | { | ||
668 | struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); | ||
669 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | ||
670 | unsigned i; | ||
671 | |||
672 | if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) { | ||
673 | for (i = 0; i < ttm->num_pages; i++) { | ||
674 | trace_amdgpu_ttm_tt_populate( | ||
675 | adev, | ||
676 | gtt->ttm.dma_address[i], | ||
677 | page_to_phys(ttm->pages[i])); | ||
678 | } | ||
679 | } | ||
680 | } | ||
681 | |||
682 | static void amdgpu_trace_dma_unmap(struct ttm_tt *ttm) | ||
683 | { | ||
684 | struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); | ||
685 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | ||
686 | unsigned i; | ||
687 | |||
688 | if (unlikely(trace_amdgpu_ttm_tt_unpopulate_enabled())) { | ||
689 | for (i = 0; i < ttm->num_pages; i++) { | ||
690 | trace_amdgpu_ttm_tt_unpopulate( | ||
691 | adev, | ||
692 | gtt->ttm.dma_address[i], | ||
693 | page_to_phys(ttm->pages[i])); | ||
694 | } | ||
695 | } | ||
696 | } | ||
697 | |||
665 | /* prepare the sg table with the user pages */ | 698 | /* prepare the sg table with the user pages */ |
666 | static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) | 699 | static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) |
667 | { | 700 | { |
@@ -688,6 +721,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) | |||
688 | drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, | 721 | drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, |
689 | gtt->ttm.dma_address, ttm->num_pages); | 722 | gtt->ttm.dma_address, ttm->num_pages); |
690 | 723 | ||
724 | amdgpu_trace_dma_map(ttm); | ||
725 | |||
691 | return 0; | 726 | return 0; |
692 | 727 | ||
693 | release_sg: | 728 | release_sg: |
@@ -721,6 +756,8 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) | |||
721 | put_page(page); | 756 | put_page(page); |
722 | } | 757 | } |
723 | 758 | ||
759 | amdgpu_trace_dma_unmap(ttm); | ||
760 | |||
724 | sg_free_table(ttm->sg); | 761 | sg_free_table(ttm->sg); |
725 | } | 762 | } |
726 | 763 | ||
@@ -892,7 +929,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, | |||
892 | 929 | ||
893 | static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) | 930 | static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) |
894 | { | 931 | { |
895 | struct amdgpu_device *adev; | 932 | struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); |
896 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 933 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
897 | unsigned i; | 934 | unsigned i; |
898 | int r; | 935 | int r; |
@@ -915,14 +952,14 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) | |||
915 | drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, | 952 | drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, |
916 | gtt->ttm.dma_address, ttm->num_pages); | 953 | gtt->ttm.dma_address, ttm->num_pages); |
917 | ttm->state = tt_unbound; | 954 | ttm->state = tt_unbound; |
918 | return 0; | 955 | r = 0; |
956 | goto trace_mappings; | ||
919 | } | 957 | } |
920 | 958 | ||
921 | adev = amdgpu_ttm_adev(ttm->bdev); | ||
922 | |||
923 | #ifdef CONFIG_SWIOTLB | 959 | #ifdef CONFIG_SWIOTLB |
924 | if (swiotlb_nr_tbl()) { | 960 | if (swiotlb_nr_tbl()) { |
925 | return ttm_dma_populate(>t->ttm, adev->dev); | 961 | r = ttm_dma_populate(>t->ttm, adev->dev); |
962 | goto trace_mappings; | ||
926 | } | 963 | } |
927 | #endif | 964 | #endif |
928 | 965 | ||
@@ -945,7 +982,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) | |||
945 | return -EFAULT; | 982 | return -EFAULT; |
946 | } | 983 | } |
947 | } | 984 | } |
948 | return 0; | 985 | |
986 | r = 0; | ||
987 | trace_mappings: | ||
988 | if (likely(!r)) | ||
989 | amdgpu_trace_dma_map(ttm); | ||
990 | return r; | ||
949 | } | 991 | } |
950 | 992 | ||
951 | static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) | 993 | static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) |
@@ -966,6 +1008,8 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) | |||
966 | 1008 | ||
967 | adev = amdgpu_ttm_adev(ttm->bdev); | 1009 | adev = amdgpu_ttm_adev(ttm->bdev); |
968 | 1010 | ||
1011 | amdgpu_trace_dma_unmap(ttm); | ||
1012 | |||
969 | #ifdef CONFIG_SWIOTLB | 1013 | #ifdef CONFIG_SWIOTLB |
970 | if (swiotlb_nr_tbl()) { | 1014 | if (swiotlb_nr_tbl()) { |
971 | ttm_dma_unpopulate(>t->ttm, adev->dev); | 1015 | ttm_dma_unpopulate(>t->ttm, adev->dev); |
@@ -1597,32 +1641,16 @@ error_free: | |||
1597 | 1641 | ||
1598 | #if defined(CONFIG_DEBUG_FS) | 1642 | #if defined(CONFIG_DEBUG_FS) |
1599 | 1643 | ||
1600 | extern void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager | ||
1601 | *man); | ||
1602 | static int amdgpu_mm_dump_table(struct seq_file *m, void *data) | 1644 | static int amdgpu_mm_dump_table(struct seq_file *m, void *data) |
1603 | { | 1645 | { |
1604 | struct drm_info_node *node = (struct drm_info_node *)m->private; | 1646 | struct drm_info_node *node = (struct drm_info_node *)m->private; |
1605 | unsigned ttm_pl = *(int *)node->info_ent->data; | 1647 | unsigned ttm_pl = *(int *)node->info_ent->data; |
1606 | struct drm_device *dev = node->minor->dev; | 1648 | struct drm_device *dev = node->minor->dev; |
1607 | struct amdgpu_device *adev = dev->dev_private; | 1649 | struct amdgpu_device *adev = dev->dev_private; |
1608 | struct drm_mm *mm = (struct drm_mm *)adev->mman.bdev.man[ttm_pl].priv; | 1650 | struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl]; |
1609 | struct ttm_bo_global *glob = adev->mman.bdev.glob; | ||
1610 | struct drm_printer p = drm_seq_file_printer(m); | 1651 | struct drm_printer p = drm_seq_file_printer(m); |
1611 | 1652 | ||
1612 | spin_lock(&glob->lru_lock); | 1653 | man->func->debug(man, &p); |
1613 | drm_mm_print(mm, &p); | ||
1614 | spin_unlock(&glob->lru_lock); | ||
1615 | switch (ttm_pl) { | ||
1616 | case TTM_PL_VRAM: | ||
1617 | seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", | ||
1618 | adev->mman.bdev.man[ttm_pl].size, | ||
1619 | (u64)atomic64_read(&adev->vram_usage) >> 20, | ||
1620 | (u64)atomic64_read(&adev->vram_vis_usage) >> 20); | ||
1621 | break; | ||
1622 | case TTM_PL_TT: | ||
1623 | amdgpu_gtt_mgr_print(m, &adev->mman.bdev.man[TTM_PL_TT]); | ||
1624 | break; | ||
1625 | } | ||
1626 | return 0; | 1654 | return 0; |
1627 | } | 1655 | } |
1628 | 1656 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 0e2399f32de7..f22a4758719d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | |||
@@ -66,6 +66,10 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, | |||
66 | struct ttm_buffer_object *tbo, | 66 | struct ttm_buffer_object *tbo, |
67 | const struct ttm_place *place, | 67 | const struct ttm_place *place, |
68 | struct ttm_mem_reg *mem); | 68 | struct ttm_mem_reg *mem); |
69 | uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man); | ||
70 | |||
71 | uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); | ||
72 | uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); | ||
69 | 73 | ||
70 | int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, | 74 | int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, |
71 | uint64_t dst_offset, uint32_t byte_count, | 75 | uint64_t dst_offset, uint32_t byte_count, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index aefecf6c1e7b..e19928dae8e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | |||
@@ -588,6 +588,10 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, | |||
588 | } | 588 | } |
589 | break; | 589 | break; |
590 | 590 | ||
591 | case 8: /* MJPEG */ | ||
592 | min_dpb_size = 0; | ||
593 | break; | ||
594 | |||
591 | case 16: /* H265 */ | 595 | case 16: /* H265 */ |
592 | image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; | 596 | image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; |
593 | image_size = ALIGN(image_size, 256); | 597 | image_size = ALIGN(image_size, 256); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 8a081e162d13..ab05121b9272 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | |||
@@ -46,14 +46,14 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev) | |||
46 | * address within META_DATA init package to support SRIOV gfx preemption. | 46 | * address within META_DATA init package to support SRIOV gfx preemption. |
47 | */ | 47 | */ |
48 | 48 | ||
49 | int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) | 49 | int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
50 | struct amdgpu_bo_va **bo_va) | ||
50 | { | 51 | { |
51 | int r; | ||
52 | struct amdgpu_bo_va *bo_va; | ||
53 | struct ww_acquire_ctx ticket; | 52 | struct ww_acquire_ctx ticket; |
54 | struct list_head list; | 53 | struct list_head list; |
55 | struct amdgpu_bo_list_entry pd; | 54 | struct amdgpu_bo_list_entry pd; |
56 | struct ttm_validate_buffer csa_tv; | 55 | struct ttm_validate_buffer csa_tv; |
56 | int r; | ||
57 | 57 | ||
58 | INIT_LIST_HEAD(&list); | 58 | INIT_LIST_HEAD(&list); |
59 | INIT_LIST_HEAD(&csa_tv.head); | 59 | INIT_LIST_HEAD(&csa_tv.head); |
@@ -69,34 +69,33 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
69 | return r; | 69 | return r; |
70 | } | 70 | } |
71 | 71 | ||
72 | bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); | 72 | *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); |
73 | if (!bo_va) { | 73 | if (!*bo_va) { |
74 | ttm_eu_backoff_reservation(&ticket, &list); | 74 | ttm_eu_backoff_reservation(&ticket, &list); |
75 | DRM_ERROR("failed to create bo_va for static CSA\n"); | 75 | DRM_ERROR("failed to create bo_va for static CSA\n"); |
76 | return -ENOMEM; | 76 | return -ENOMEM; |
77 | } | 77 | } |
78 | 78 | ||
79 | r = amdgpu_vm_alloc_pts(adev, bo_va->vm, AMDGPU_CSA_VADDR, | 79 | r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR, |
80 | AMDGPU_CSA_SIZE); | 80 | AMDGPU_CSA_SIZE); |
81 | if (r) { | 81 | if (r) { |
82 | DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); | 82 | DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); |
83 | amdgpu_vm_bo_rmv(adev, bo_va); | 83 | amdgpu_vm_bo_rmv(adev, *bo_va); |
84 | ttm_eu_backoff_reservation(&ticket, &list); | 84 | ttm_eu_backoff_reservation(&ticket, &list); |
85 | return r; | 85 | return r; |
86 | } | 86 | } |
87 | 87 | ||
88 | r = amdgpu_vm_bo_map(adev, bo_va, AMDGPU_CSA_VADDR, 0,AMDGPU_CSA_SIZE, | 88 | r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE, |
89 | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | | 89 | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | |
90 | AMDGPU_PTE_EXECUTABLE); | 90 | AMDGPU_PTE_EXECUTABLE); |
91 | 91 | ||
92 | if (r) { | 92 | if (r) { |
93 | DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); | 93 | DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); |
94 | amdgpu_vm_bo_rmv(adev, bo_va); | 94 | amdgpu_vm_bo_rmv(adev, *bo_va); |
95 | ttm_eu_backoff_reservation(&ticket, &list); | 95 | ttm_eu_backoff_reservation(&ticket, &list); |
96 | return r; | 96 | return r; |
97 | } | 97 | } |
98 | 98 | ||
99 | vm->csa_bo_va = bo_va; | ||
100 | ttm_eu_backoff_reservation(&ticket, &list); | 99 | ttm_eu_backoff_reservation(&ticket, &list); |
101 | return 0; | 100 | return 0; |
102 | } | 101 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index e5b1baf387c1..afcfb8bcfb65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | |||
@@ -90,7 +90,8 @@ static inline bool is_virtual_machine(void) | |||
90 | 90 | ||
91 | struct amdgpu_vm; | 91 | struct amdgpu_vm; |
92 | int amdgpu_allocate_static_csa(struct amdgpu_device *adev); | 92 | int amdgpu_allocate_static_csa(struct amdgpu_device *adev); |
93 | int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm); | 93 | int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
94 | struct amdgpu_bo_va **bo_va); | ||
94 | void amdgpu_virt_init_setting(struct amdgpu_device *adev); | 95 | void amdgpu_virt_init_setting(struct amdgpu_device *adev); |
95 | uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); | 96 | uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); |
96 | void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); | 97 | void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9ce36652029e..6b1343e5541d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -159,11 +159,20 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, | |||
159 | */ | 159 | */ |
160 | static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, | 160 | static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, |
161 | int (*validate)(void *, struct amdgpu_bo *), | 161 | int (*validate)(void *, struct amdgpu_bo *), |
162 | void *param, bool use_cpu_for_update) | 162 | void *param, bool use_cpu_for_update, |
163 | struct ttm_bo_global *glob) | ||
163 | { | 164 | { |
164 | unsigned i; | 165 | unsigned i; |
165 | int r; | 166 | int r; |
166 | 167 | ||
168 | if (parent->bo->shadow) { | ||
169 | struct amdgpu_bo *shadow = parent->bo->shadow; | ||
170 | |||
171 | r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); | ||
172 | if (r) | ||
173 | return r; | ||
174 | } | ||
175 | |||
167 | if (use_cpu_for_update) { | 176 | if (use_cpu_for_update) { |
168 | r = amdgpu_bo_kmap(parent->bo, NULL); | 177 | r = amdgpu_bo_kmap(parent->bo, NULL); |
169 | if (r) | 178 | if (r) |
@@ -183,12 +192,18 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, | |||
183 | if (r) | 192 | if (r) |
184 | return r; | 193 | return r; |
185 | 194 | ||
195 | spin_lock(&glob->lru_lock); | ||
196 | ttm_bo_move_to_lru_tail(&entry->bo->tbo); | ||
197 | if (entry->bo->shadow) | ||
198 | ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo); | ||
199 | spin_unlock(&glob->lru_lock); | ||
200 | |||
186 | /* | 201 | /* |
187 | * Recurse into the sub directory. This is harmless because we | 202 | * Recurse into the sub directory. This is harmless because we |
188 | * have only a maximum of 5 layers. | 203 | * have only a maximum of 5 layers. |
189 | */ | 204 | */ |
190 | r = amdgpu_vm_validate_level(entry, validate, param, | 205 | r = amdgpu_vm_validate_level(entry, validate, param, |
191 | use_cpu_for_update); | 206 | use_cpu_for_update, glob); |
192 | if (r) | 207 | if (r) |
193 | return r; | 208 | return r; |
194 | } | 209 | } |
@@ -220,54 +235,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
220 | return 0; | 235 | return 0; |
221 | 236 | ||
222 | return amdgpu_vm_validate_level(&vm->root, validate, param, | 237 | return amdgpu_vm_validate_level(&vm->root, validate, param, |
223 | vm->use_cpu_for_update); | 238 | vm->use_cpu_for_update, |
239 | adev->mman.bdev.glob); | ||
224 | } | 240 | } |
225 | 241 | ||
226 | /** | 242 | /** |
227 | * amdgpu_vm_move_level_in_lru - move one level of PT BOs to the LRU tail | ||
228 | * | ||
229 | * @adev: amdgpu device instance | ||
230 | * @vm: vm providing the BOs | ||
231 | * | ||
232 | * Move the PT BOs to the tail of the LRU. | ||
233 | */ | ||
234 | static void amdgpu_vm_move_level_in_lru(struct amdgpu_vm_pt *parent) | ||
235 | { | ||
236 | unsigned i; | ||
237 | |||
238 | if (!parent->entries) | ||
239 | return; | ||
240 | |||
241 | for (i = 0; i <= parent->last_entry_used; ++i) { | ||
242 | struct amdgpu_vm_pt *entry = &parent->entries[i]; | ||
243 | |||
244 | if (!entry->bo) | ||
245 | continue; | ||
246 | |||
247 | ttm_bo_move_to_lru_tail(&entry->bo->tbo); | ||
248 | amdgpu_vm_move_level_in_lru(entry); | ||
249 | } | ||
250 | } | ||
251 | |||
252 | /** | ||
253 | * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail | ||
254 | * | ||
255 | * @adev: amdgpu device instance | ||
256 | * @vm: vm providing the BOs | ||
257 | * | ||
258 | * Move the PT BOs to the tail of the LRU. | ||
259 | */ | ||
260 | void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, | ||
261 | struct amdgpu_vm *vm) | ||
262 | { | ||
263 | struct ttm_bo_global *glob = adev->mman.bdev.glob; | ||
264 | |||
265 | spin_lock(&glob->lru_lock); | ||
266 | amdgpu_vm_move_level_in_lru(&vm->root); | ||
267 | spin_unlock(&glob->lru_lock); | ||
268 | } | ||
269 | |||
270 | /** | ||
271 | * amdgpu_vm_alloc_levels - allocate the PD/PT levels | 243 | * amdgpu_vm_alloc_levels - allocate the PD/PT levels |
272 | * | 244 | * |
273 | * @adev: amdgpu_device pointer | 245 | * @adev: amdgpu_device pointer |
@@ -359,7 +331,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
359 | 331 | ||
360 | entry->bo = pt; | 332 | entry->bo = pt; |
361 | entry->addr = 0; | 333 | entry->addr = 0; |
362 | entry->huge_page = false; | ||
363 | } | 334 | } |
364 | 335 | ||
365 | if (level < adev->vm_manager.num_level) { | 336 | if (level < adev->vm_manager.num_level) { |
@@ -899,8 +870,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, | |||
899 | { | 870 | { |
900 | struct amdgpu_bo_va *bo_va; | 871 | struct amdgpu_bo_va *bo_va; |
901 | 872 | ||
902 | list_for_each_entry(bo_va, &bo->va, bo_list) { | 873 | list_for_each_entry(bo_va, &bo->va, base.bo_list) { |
903 | if (bo_va->vm == vm) { | 874 | if (bo_va->base.vm == vm) { |
904 | return bo_va; | 875 | return bo_va; |
905 | } | 876 | } |
906 | } | 877 | } |
@@ -1074,11 +1045,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1074 | 1045 | ||
1075 | params.func = amdgpu_vm_cpu_set_ptes; | 1046 | params.func = amdgpu_vm_cpu_set_ptes; |
1076 | } else { | 1047 | } else { |
1077 | if (shadow) { | ||
1078 | r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); | ||
1079 | if (r) | ||
1080 | return r; | ||
1081 | } | ||
1082 | ring = container_of(vm->entity.sched, struct amdgpu_ring, | 1048 | ring = container_of(vm->entity.sched, struct amdgpu_ring, |
1083 | sched); | 1049 | sched); |
1084 | 1050 | ||
@@ -1114,22 +1080,14 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1114 | if (bo == NULL) | 1080 | if (bo == NULL) |
1115 | continue; | 1081 | continue; |
1116 | 1082 | ||
1117 | if (bo->shadow) { | ||
1118 | struct amdgpu_bo *pt_shadow = bo->shadow; | ||
1119 | |||
1120 | r = amdgpu_ttm_bind(&pt_shadow->tbo, | ||
1121 | &pt_shadow->tbo.mem); | ||
1122 | if (r) | ||
1123 | return r; | ||
1124 | } | ||
1125 | |||
1126 | pt = amdgpu_bo_gpu_offset(bo); | 1083 | pt = amdgpu_bo_gpu_offset(bo); |
1127 | pt = amdgpu_gart_get_vm_pde(adev, pt); | 1084 | pt = amdgpu_gart_get_vm_pde(adev, pt); |
1128 | if (parent->entries[pt_idx].addr == pt || | 1085 | /* Don't update huge pages here */ |
1129 | parent->entries[pt_idx].huge_page) | 1086 | if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) || |
1087 | parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID)) | ||
1130 | continue; | 1088 | continue; |
1131 | 1089 | ||
1132 | parent->entries[pt_idx].addr = pt; | 1090 | parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; |
1133 | 1091 | ||
1134 | pde = pd_addr + pt_idx * 8; | 1092 | pde = pd_addr + pt_idx * 8; |
1135 | if (((last_pde + 8 * count) != pde) || | 1093 | if (((last_pde + 8 * count) != pde) || |
@@ -1307,15 +1265,14 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, | |||
1307 | * | 1265 | * |
1308 | * Check if we can update the PD with a huge page. | 1266 | * Check if we can update the PD with a huge page. |
1309 | */ | 1267 | */ |
1310 | static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, | 1268 | static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, |
1311 | struct amdgpu_vm_pt *entry, | 1269 | struct amdgpu_vm_pt *entry, |
1312 | struct amdgpu_vm_pt *parent, | 1270 | struct amdgpu_vm_pt *parent, |
1313 | unsigned nptes, uint64_t dst, | 1271 | unsigned nptes, uint64_t dst, |
1314 | uint64_t flags) | 1272 | uint64_t flags) |
1315 | { | 1273 | { |
1316 | bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); | 1274 | bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); |
1317 | uint64_t pd_addr, pde; | 1275 | uint64_t pd_addr, pde; |
1318 | int r; | ||
1319 | 1276 | ||
1320 | /* In the case of a mixed PT the PDE must point to it*/ | 1277 | /* In the case of a mixed PT the PDE must point to it*/ |
1321 | if (p->adev->asic_type < CHIP_VEGA10 || | 1278 | if (p->adev->asic_type < CHIP_VEGA10 || |
@@ -1327,21 +1284,17 @@ static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, | |||
1327 | dst = amdgpu_gart_get_vm_pde(p->adev, dst); | 1284 | dst = amdgpu_gart_get_vm_pde(p->adev, dst); |
1328 | flags = AMDGPU_PTE_VALID; | 1285 | flags = AMDGPU_PTE_VALID; |
1329 | } else { | 1286 | } else { |
1287 | /* Set the huge page flag to stop scanning at this PDE */ | ||
1330 | flags |= AMDGPU_PDE_PTE; | 1288 | flags |= AMDGPU_PDE_PTE; |
1331 | } | 1289 | } |
1332 | 1290 | ||
1333 | if (entry->addr == dst && | 1291 | if (entry->addr == (dst | flags)) |
1334 | entry->huge_page == !!(flags & AMDGPU_PDE_PTE)) | 1292 | return; |
1335 | return 0; | ||
1336 | 1293 | ||
1337 | entry->addr = dst; | 1294 | entry->addr = (dst | flags); |
1338 | entry->huge_page = !!(flags & AMDGPU_PDE_PTE); | ||
1339 | 1295 | ||
1340 | if (use_cpu_update) { | 1296 | if (use_cpu_update) { |
1341 | r = amdgpu_bo_kmap(parent->bo, (void *)&pd_addr); | 1297 | pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); |
1342 | if (r) | ||
1343 | return r; | ||
1344 | |||
1345 | pde = pd_addr + (entry - parent->entries) * 8; | 1298 | pde = pd_addr + (entry - parent->entries) * 8; |
1346 | amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); | 1299 | amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); |
1347 | } else { | 1300 | } else { |
@@ -1354,8 +1307,6 @@ static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, | |||
1354 | pde = pd_addr + (entry - parent->entries) * 8; | 1307 | pde = pd_addr + (entry - parent->entries) * 8; |
1355 | amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); | 1308 | amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); |
1356 | } | 1309 | } |
1357 | |||
1358 | return 0; | ||
1359 | } | 1310 | } |
1360 | 1311 | ||
1361 | /** | 1312 | /** |
@@ -1382,7 +1333,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | |||
1382 | struct amdgpu_bo *pt; | 1333 | struct amdgpu_bo *pt; |
1383 | unsigned nptes; | 1334 | unsigned nptes; |
1384 | bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); | 1335 | bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); |
1385 | int r; | ||
1386 | 1336 | ||
1387 | /* walk over the address space and update the page tables */ | 1337 | /* walk over the address space and update the page tables */ |
1388 | for (addr = start; addr < end; addr += nptes, | 1338 | for (addr = start; addr < end; addr += nptes, |
@@ -1398,12 +1348,10 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | |||
1398 | else | 1348 | else |
1399 | nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); | 1349 | nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); |
1400 | 1350 | ||
1401 | r = amdgpu_vm_handle_huge_pages(params, entry, parent, | 1351 | amdgpu_vm_handle_huge_pages(params, entry, parent, |
1402 | nptes, dst, flags); | 1352 | nptes, dst, flags); |
1403 | if (r) | 1353 | /* We don't need to update PTEs for huge pages */ |
1404 | return r; | 1354 | if (entry->addr & AMDGPU_PDE_PTE) |
1405 | |||
1406 | if (entry->huge_page) | ||
1407 | continue; | 1355 | continue; |
1408 | 1356 | ||
1409 | pt = entry->bo; | 1357 | pt = entry->bo; |
@@ -1462,9 +1410,7 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, | |||
1462 | * Userspace can support this by aligning virtual base address and | 1410 | * Userspace can support this by aligning virtual base address and |
1463 | * allocation size to the fragment size. | 1411 | * allocation size to the fragment size. |
1464 | */ | 1412 | */ |
1465 | 1413 | unsigned pages_per_frag = params->adev->vm_manager.fragment_size; | |
1466 | /* SI and newer are optimized for 64KB */ | ||
1467 | unsigned pages_per_frag = AMDGPU_LOG2_PAGES_PER_FRAG(params->adev); | ||
1468 | uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); | 1414 | uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); |
1469 | uint64_t frag_align = 1 << pages_per_frag; | 1415 | uint64_t frag_align = 1 << pages_per_frag; |
1470 | 1416 | ||
@@ -1778,7 +1724,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
1778 | struct amdgpu_bo_va *bo_va, | 1724 | struct amdgpu_bo_va *bo_va, |
1779 | bool clear) | 1725 | bool clear) |
1780 | { | 1726 | { |
1781 | struct amdgpu_vm *vm = bo_va->vm; | 1727 | struct amdgpu_bo *bo = bo_va->base.bo; |
1728 | struct amdgpu_vm *vm = bo_va->base.vm; | ||
1782 | struct amdgpu_bo_va_mapping *mapping; | 1729 | struct amdgpu_bo_va_mapping *mapping; |
1783 | dma_addr_t *pages_addr = NULL; | 1730 | dma_addr_t *pages_addr = NULL; |
1784 | uint64_t gtt_flags, flags; | 1731 | uint64_t gtt_flags, flags; |
@@ -1787,27 +1734,27 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
1787 | struct dma_fence *exclusive; | 1734 | struct dma_fence *exclusive; |
1788 | int r; | 1735 | int r; |
1789 | 1736 | ||
1790 | if (clear || !bo_va->bo) { | 1737 | if (clear || !bo_va->base.bo) { |
1791 | mem = NULL; | 1738 | mem = NULL; |
1792 | nodes = NULL; | 1739 | nodes = NULL; |
1793 | exclusive = NULL; | 1740 | exclusive = NULL; |
1794 | } else { | 1741 | } else { |
1795 | struct ttm_dma_tt *ttm; | 1742 | struct ttm_dma_tt *ttm; |
1796 | 1743 | ||
1797 | mem = &bo_va->bo->tbo.mem; | 1744 | mem = &bo_va->base.bo->tbo.mem; |
1798 | nodes = mem->mm_node; | 1745 | nodes = mem->mm_node; |
1799 | if (mem->mem_type == TTM_PL_TT) { | 1746 | if (mem->mem_type == TTM_PL_TT) { |
1800 | ttm = container_of(bo_va->bo->tbo.ttm, struct | 1747 | ttm = container_of(bo_va->base.bo->tbo.ttm, |
1801 | ttm_dma_tt, ttm); | 1748 | struct ttm_dma_tt, ttm); |
1802 | pages_addr = ttm->dma_address; | 1749 | pages_addr = ttm->dma_address; |
1803 | } | 1750 | } |
1804 | exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); | 1751 | exclusive = reservation_object_get_excl(bo->tbo.resv); |
1805 | } | 1752 | } |
1806 | 1753 | ||
1807 | if (bo_va->bo) { | 1754 | if (bo) { |
1808 | flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); | 1755 | flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); |
1809 | gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && | 1756 | gtt_flags = (amdgpu_ttm_is_bound(bo->tbo.ttm) && |
1810 | adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ? | 1757 | adev == amdgpu_ttm_adev(bo->tbo.bdev)) ? |
1811 | flags : 0; | 1758 | flags : 0; |
1812 | } else { | 1759 | } else { |
1813 | flags = 0x0; | 1760 | flags = 0x0; |
@@ -1815,7 +1762,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
1815 | } | 1762 | } |
1816 | 1763 | ||
1817 | spin_lock(&vm->status_lock); | 1764 | spin_lock(&vm->status_lock); |
1818 | if (!list_empty(&bo_va->vm_status)) | 1765 | if (!list_empty(&bo_va->base.vm_status)) |
1819 | list_splice_init(&bo_va->valids, &bo_va->invalids); | 1766 | list_splice_init(&bo_va->valids, &bo_va->invalids); |
1820 | spin_unlock(&vm->status_lock); | 1767 | spin_unlock(&vm->status_lock); |
1821 | 1768 | ||
@@ -1838,9 +1785,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
1838 | 1785 | ||
1839 | spin_lock(&vm->status_lock); | 1786 | spin_lock(&vm->status_lock); |
1840 | list_splice_init(&bo_va->invalids, &bo_va->valids); | 1787 | list_splice_init(&bo_va->invalids, &bo_va->valids); |
1841 | list_del_init(&bo_va->vm_status); | 1788 | list_del_init(&bo_va->base.vm_status); |
1842 | if (clear) | 1789 | if (clear) |
1843 | list_add(&bo_va->vm_status, &vm->cleared); | 1790 | list_add(&bo_va->base.vm_status, &vm->cleared); |
1844 | spin_unlock(&vm->status_lock); | 1791 | spin_unlock(&vm->status_lock); |
1845 | 1792 | ||
1846 | if (vm->use_cpu_for_update) { | 1793 | if (vm->use_cpu_for_update) { |
@@ -2034,26 +1981,26 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | |||
2034 | } | 1981 | } |
2035 | 1982 | ||
2036 | /** | 1983 | /** |
2037 | * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT | 1984 | * amdgpu_vm_clear_moved - clear moved BOs in the PT |
2038 | * | 1985 | * |
2039 | * @adev: amdgpu_device pointer | 1986 | * @adev: amdgpu_device pointer |
2040 | * @vm: requested vm | 1987 | * @vm: requested vm |
2041 | * | 1988 | * |
2042 | * Make sure all invalidated BOs are cleared in the PT. | 1989 | * Make sure all moved BOs are cleared in the PT. |
2043 | * Returns 0 for success. | 1990 | * Returns 0 for success. |
2044 | * | 1991 | * |
2045 | * PTs have to be reserved and mutex must be locked! | 1992 | * PTs have to be reserved and mutex must be locked! |
2046 | */ | 1993 | */ |
2047 | int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, | 1994 | int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
2048 | struct amdgpu_vm *vm, struct amdgpu_sync *sync) | 1995 | struct amdgpu_sync *sync) |
2049 | { | 1996 | { |
2050 | struct amdgpu_bo_va *bo_va = NULL; | 1997 | struct amdgpu_bo_va *bo_va = NULL; |
2051 | int r = 0; | 1998 | int r = 0; |
2052 | 1999 | ||
2053 | spin_lock(&vm->status_lock); | 2000 | spin_lock(&vm->status_lock); |
2054 | while (!list_empty(&vm->invalidated)) { | 2001 | while (!list_empty(&vm->moved)) { |
2055 | bo_va = list_first_entry(&vm->invalidated, | 2002 | bo_va = list_first_entry(&vm->moved, |
2056 | struct amdgpu_bo_va, vm_status); | 2003 | struct amdgpu_bo_va, base.vm_status); |
2057 | spin_unlock(&vm->status_lock); | 2004 | spin_unlock(&vm->status_lock); |
2058 | 2005 | ||
2059 | r = amdgpu_vm_bo_update(adev, bo_va, true); | 2006 | r = amdgpu_vm_bo_update(adev, bo_va, true); |
@@ -2093,16 +2040,17 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, | |||
2093 | if (bo_va == NULL) { | 2040 | if (bo_va == NULL) { |
2094 | return NULL; | 2041 | return NULL; |
2095 | } | 2042 | } |
2096 | bo_va->vm = vm; | 2043 | bo_va->base.vm = vm; |
2097 | bo_va->bo = bo; | 2044 | bo_va->base.bo = bo; |
2045 | INIT_LIST_HEAD(&bo_va->base.bo_list); | ||
2046 | INIT_LIST_HEAD(&bo_va->base.vm_status); | ||
2047 | |||
2098 | bo_va->ref_count = 1; | 2048 | bo_va->ref_count = 1; |
2099 | INIT_LIST_HEAD(&bo_va->bo_list); | ||
2100 | INIT_LIST_HEAD(&bo_va->valids); | 2049 | INIT_LIST_HEAD(&bo_va->valids); |
2101 | INIT_LIST_HEAD(&bo_va->invalids); | 2050 | INIT_LIST_HEAD(&bo_va->invalids); |
2102 | INIT_LIST_HEAD(&bo_va->vm_status); | ||
2103 | 2051 | ||
2104 | if (bo) | 2052 | if (bo) |
2105 | list_add_tail(&bo_va->bo_list, &bo->va); | 2053 | list_add_tail(&bo_va->base.bo_list, &bo->va); |
2106 | 2054 | ||
2107 | return bo_va; | 2055 | return bo_va; |
2108 | } | 2056 | } |
@@ -2127,7 +2075,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
2127 | uint64_t size, uint64_t flags) | 2075 | uint64_t size, uint64_t flags) |
2128 | { | 2076 | { |
2129 | struct amdgpu_bo_va_mapping *mapping, *tmp; | 2077 | struct amdgpu_bo_va_mapping *mapping, *tmp; |
2130 | struct amdgpu_vm *vm = bo_va->vm; | 2078 | struct amdgpu_bo *bo = bo_va->base.bo; |
2079 | struct amdgpu_vm *vm = bo_va->base.vm; | ||
2131 | uint64_t eaddr; | 2080 | uint64_t eaddr; |
2132 | 2081 | ||
2133 | /* validate the parameters */ | 2082 | /* validate the parameters */ |
@@ -2138,7 +2087,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
2138 | /* make sure object fit at this offset */ | 2087 | /* make sure object fit at this offset */ |
2139 | eaddr = saddr + size - 1; | 2088 | eaddr = saddr + size - 1; |
2140 | if (saddr >= eaddr || | 2089 | if (saddr >= eaddr || |
2141 | (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) | 2090 | (bo && offset + size > amdgpu_bo_size(bo))) |
2142 | return -EINVAL; | 2091 | return -EINVAL; |
2143 | 2092 | ||
2144 | saddr /= AMDGPU_GPU_PAGE_SIZE; | 2093 | saddr /= AMDGPU_GPU_PAGE_SIZE; |
@@ -2148,7 +2097,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
2148 | if (tmp) { | 2097 | if (tmp) { |
2149 | /* bo and tmp overlap, invalid addr */ | 2098 | /* bo and tmp overlap, invalid addr */ |
2150 | dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " | 2099 | dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " |
2151 | "0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr, | 2100 | "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr, |
2152 | tmp->start, tmp->last + 1); | 2101 | tmp->start, tmp->last + 1); |
2153 | return -EINVAL; | 2102 | return -EINVAL; |
2154 | } | 2103 | } |
@@ -2193,7 +2142,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, | |||
2193 | uint64_t size, uint64_t flags) | 2142 | uint64_t size, uint64_t flags) |
2194 | { | 2143 | { |
2195 | struct amdgpu_bo_va_mapping *mapping; | 2144 | struct amdgpu_bo_va_mapping *mapping; |
2196 | struct amdgpu_vm *vm = bo_va->vm; | 2145 | struct amdgpu_bo *bo = bo_va->base.bo; |
2146 | struct amdgpu_vm *vm = bo_va->base.vm; | ||
2197 | uint64_t eaddr; | 2147 | uint64_t eaddr; |
2198 | int r; | 2148 | int r; |
2199 | 2149 | ||
@@ -2205,7 +2155,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, | |||
2205 | /* make sure object fit at this offset */ | 2155 | /* make sure object fit at this offset */ |
2206 | eaddr = saddr + size - 1; | 2156 | eaddr = saddr + size - 1; |
2207 | if (saddr >= eaddr || | 2157 | if (saddr >= eaddr || |
2208 | (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) | 2158 | (bo && offset + size > amdgpu_bo_size(bo))) |
2209 | return -EINVAL; | 2159 | return -EINVAL; |
2210 | 2160 | ||
2211 | /* Allocate all the needed memory */ | 2161 | /* Allocate all the needed memory */ |
@@ -2213,7 +2163,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, | |||
2213 | if (!mapping) | 2163 | if (!mapping) |
2214 | return -ENOMEM; | 2164 | return -ENOMEM; |
2215 | 2165 | ||
2216 | r = amdgpu_vm_bo_clear_mappings(adev, bo_va->vm, saddr, size); | 2166 | r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size); |
2217 | if (r) { | 2167 | if (r) { |
2218 | kfree(mapping); | 2168 | kfree(mapping); |
2219 | return r; | 2169 | return r; |
@@ -2253,7 +2203,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | |||
2253 | uint64_t saddr) | 2203 | uint64_t saddr) |
2254 | { | 2204 | { |
2255 | struct amdgpu_bo_va_mapping *mapping; | 2205 | struct amdgpu_bo_va_mapping *mapping; |
2256 | struct amdgpu_vm *vm = bo_va->vm; | 2206 | struct amdgpu_vm *vm = bo_va->base.vm; |
2257 | bool valid = true; | 2207 | bool valid = true; |
2258 | 2208 | ||
2259 | saddr /= AMDGPU_GPU_PAGE_SIZE; | 2209 | saddr /= AMDGPU_GPU_PAGE_SIZE; |
@@ -2401,12 +2351,12 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | |||
2401 | struct amdgpu_bo_va *bo_va) | 2351 | struct amdgpu_bo_va *bo_va) |
2402 | { | 2352 | { |
2403 | struct amdgpu_bo_va_mapping *mapping, *next; | 2353 | struct amdgpu_bo_va_mapping *mapping, *next; |
2404 | struct amdgpu_vm *vm = bo_va->vm; | 2354 | struct amdgpu_vm *vm = bo_va->base.vm; |
2405 | 2355 | ||
2406 | list_del(&bo_va->bo_list); | 2356 | list_del(&bo_va->base.bo_list); |
2407 | 2357 | ||
2408 | spin_lock(&vm->status_lock); | 2358 | spin_lock(&vm->status_lock); |
2409 | list_del(&bo_va->vm_status); | 2359 | list_del(&bo_va->base.vm_status); |
2410 | spin_unlock(&vm->status_lock); | 2360 | spin_unlock(&vm->status_lock); |
2411 | 2361 | ||
2412 | list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { | 2362 | list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { |
@@ -2438,13 +2388,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | |||
2438 | void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, | 2388 | void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, |
2439 | struct amdgpu_bo *bo) | 2389 | struct amdgpu_bo *bo) |
2440 | { | 2390 | { |
2441 | struct amdgpu_bo_va *bo_va; | 2391 | struct amdgpu_vm_bo_base *bo_base; |
2442 | 2392 | ||
2443 | list_for_each_entry(bo_va, &bo->va, bo_list) { | 2393 | list_for_each_entry(bo_base, &bo->va, bo_list) { |
2444 | spin_lock(&bo_va->vm->status_lock); | 2394 | spin_lock(&bo_base->vm->status_lock); |
2445 | if (list_empty(&bo_va->vm_status)) | 2395 | if (list_empty(&bo_base->vm_status)) |
2446 | list_add(&bo_va->vm_status, &bo_va->vm->invalidated); | 2396 | list_add(&bo_base->vm_status, |
2447 | spin_unlock(&bo_va->vm->status_lock); | 2397 | &bo_base->vm->moved); |
2398 | spin_unlock(&bo_base->vm->status_lock); | ||
2448 | } | 2399 | } |
2449 | } | 2400 | } |
2450 | 2401 | ||
@@ -2462,12 +2413,26 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) | |||
2462 | } | 2413 | } |
2463 | 2414 | ||
2464 | /** | 2415 | /** |
2465 | * amdgpu_vm_adjust_size - adjust vm size and block size | 2416 | * amdgpu_vm_set_fragment_size - adjust fragment size in PTE |
2417 | * | ||
2418 | * @adev: amdgpu_device pointer | ||
2419 | * @fragment_size_default: the default fragment size if it's set auto | ||
2420 | */ | ||
2421 | void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default) | ||
2422 | { | ||
2423 | if (amdgpu_vm_fragment_size == -1) | ||
2424 | adev->vm_manager.fragment_size = fragment_size_default; | ||
2425 | else | ||
2426 | adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; | ||
2427 | } | ||
2428 | |||
2429 | /** | ||
2430 | * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size | ||
2466 | * | 2431 | * |
2467 | * @adev: amdgpu_device pointer | 2432 | * @adev: amdgpu_device pointer |
2468 | * @vm_size: the default vm size if it's set auto | 2433 | * @vm_size: the default vm size if it's set auto |
2469 | */ | 2434 | */ |
2470 | void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) | 2435 | void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default) |
2471 | { | 2436 | { |
2472 | /* adjust vm size firstly */ | 2437 | /* adjust vm size firstly */ |
2473 | if (amdgpu_vm_size == -1) | 2438 | if (amdgpu_vm_size == -1) |
@@ -2482,8 +2447,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) | |||
2482 | else | 2447 | else |
2483 | adev->vm_manager.block_size = amdgpu_vm_block_size; | 2448 | adev->vm_manager.block_size = amdgpu_vm_block_size; |
2484 | 2449 | ||
2485 | DRM_INFO("vm size is %llu GB, block size is %u-bit\n", | 2450 | amdgpu_vm_set_fragment_size(adev, fragment_size_default); |
2486 | adev->vm_manager.vm_size, adev->vm_manager.block_size); | 2451 | |
2452 | DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n", | ||
2453 | adev->vm_manager.vm_size, adev->vm_manager.block_size, | ||
2454 | adev->vm_manager.fragment_size); | ||
2487 | } | 2455 | } |
2488 | 2456 | ||
2489 | /** | 2457 | /** |
@@ -2512,7 +2480,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2512 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) | 2480 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) |
2513 | vm->reserved_vmid[i] = NULL; | 2481 | vm->reserved_vmid[i] = NULL; |
2514 | spin_lock_init(&vm->status_lock); | 2482 | spin_lock_init(&vm->status_lock); |
2515 | INIT_LIST_HEAD(&vm->invalidated); | 2483 | INIT_LIST_HEAD(&vm->moved); |
2516 | INIT_LIST_HEAD(&vm->cleared); | 2484 | INIT_LIST_HEAD(&vm->cleared); |
2517 | INIT_LIST_HEAD(&vm->freed); | 2485 | INIT_LIST_HEAD(&vm->freed); |
2518 | 2486 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 217ecba8f4cc..ba6691b58ee7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | |||
@@ -50,11 +50,6 @@ struct amdgpu_bo_list_entry; | |||
50 | /* PTBs (Page Table Blocks) need to be aligned to 32K */ | 50 | /* PTBs (Page Table Blocks) need to be aligned to 32K */ |
51 | #define AMDGPU_VM_PTB_ALIGN_SIZE 32768 | 51 | #define AMDGPU_VM_PTB_ALIGN_SIZE 32768 |
52 | 52 | ||
53 | /* LOG2 number of continuous pages for the fragment field */ | ||
54 | #define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \ | ||
55 | ((adev)->asic_type < CHIP_VEGA10 ? 4 : \ | ||
56 | (adev)->vm_manager.block_size) | ||
57 | |||
58 | #define AMDGPU_PTE_VALID (1ULL << 0) | 53 | #define AMDGPU_PTE_VALID (1ULL << 0) |
59 | #define AMDGPU_PTE_SYSTEM (1ULL << 1) | 54 | #define AMDGPU_PTE_SYSTEM (1ULL << 1) |
60 | #define AMDGPU_PTE_SNOOPED (1ULL << 2) | 55 | #define AMDGPU_PTE_SNOOPED (1ULL << 2) |
@@ -99,11 +94,22 @@ struct amdgpu_bo_list_entry; | |||
99 | #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) | 94 | #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) |
100 | #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) | 95 | #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) |
101 | 96 | ||
97 | /* base structure for tracking BO usage in a VM */ | ||
98 | struct amdgpu_vm_bo_base { | ||
99 | /* constant after initialization */ | ||
100 | struct amdgpu_vm *vm; | ||
101 | struct amdgpu_bo *bo; | ||
102 | |||
103 | /* protected by bo being reserved */ | ||
104 | struct list_head bo_list; | ||
105 | |||
106 | /* protected by spinlock */ | ||
107 | struct list_head vm_status; | ||
108 | }; | ||
102 | 109 | ||
103 | struct amdgpu_vm_pt { | 110 | struct amdgpu_vm_pt { |
104 | struct amdgpu_bo *bo; | 111 | struct amdgpu_bo *bo; |
105 | uint64_t addr; | 112 | uint64_t addr; |
106 | bool huge_page; | ||
107 | 113 | ||
108 | /* array of page tables, one for each directory entry */ | 114 | /* array of page tables, one for each directory entry */ |
109 | struct amdgpu_vm_pt *entries; | 115 | struct amdgpu_vm_pt *entries; |
@@ -118,7 +124,7 @@ struct amdgpu_vm { | |||
118 | spinlock_t status_lock; | 124 | spinlock_t status_lock; |
119 | 125 | ||
120 | /* BOs moved, but not yet updated in the PT */ | 126 | /* BOs moved, but not yet updated in the PT */ |
121 | struct list_head invalidated; | 127 | struct list_head moved; |
122 | 128 | ||
123 | /* BOs cleared in the PT because of a move */ | 129 | /* BOs cleared in the PT because of a move */ |
124 | struct list_head cleared; | 130 | struct list_head cleared; |
@@ -141,8 +147,6 @@ struct amdgpu_vm { | |||
141 | u64 client_id; | 147 | u64 client_id; |
142 | /* dedicated to vm */ | 148 | /* dedicated to vm */ |
143 | struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; | 149 | struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; |
144 | /* each VM will map on CSA */ | ||
145 | struct amdgpu_bo_va *csa_bo_va; | ||
146 | 150 | ||
147 | /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ | 151 | /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ |
148 | bool use_cpu_for_update; | 152 | bool use_cpu_for_update; |
@@ -191,6 +195,7 @@ struct amdgpu_vm_manager { | |||
191 | uint32_t num_level; | 195 | uint32_t num_level; |
192 | uint64_t vm_size; | 196 | uint64_t vm_size; |
193 | uint32_t block_size; | 197 | uint32_t block_size; |
198 | uint32_t fragment_size; | ||
194 | /* vram base address for page table entry */ | 199 | /* vram base address for page table entry */ |
195 | u64 vram_base_offset; | 200 | u64 vram_base_offset; |
196 | /* vm pte handling */ | 201 | /* vm pte handling */ |
@@ -223,8 +228,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, | |||
223 | int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, | 228 | int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
224 | int (*callback)(void *p, struct amdgpu_bo *bo), | 229 | int (*callback)(void *p, struct amdgpu_bo *bo), |
225 | void *param); | 230 | void *param); |
226 | void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, | ||
227 | struct amdgpu_vm *vm); | ||
228 | int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, | 231 | int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, |
229 | struct amdgpu_vm *vm, | 232 | struct amdgpu_vm *vm, |
230 | uint64_t saddr, uint64_t size); | 233 | uint64_t saddr, uint64_t size); |
@@ -240,8 +243,8 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, | |||
240 | int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | 243 | int amdgpu_vm_clear_freed(struct amdgpu_device *adev, |
241 | struct amdgpu_vm *vm, | 244 | struct amdgpu_vm *vm, |
242 | struct dma_fence **fence); | 245 | struct dma_fence **fence); |
243 | int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm, | 246 | int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
244 | struct amdgpu_sync *sync); | 247 | struct amdgpu_sync *sync); |
245 | int amdgpu_vm_bo_update(struct amdgpu_device *adev, | 248 | int amdgpu_vm_bo_update(struct amdgpu_device *adev, |
246 | struct amdgpu_bo_va *bo_va, | 249 | struct amdgpu_bo_va *bo_va, |
247 | bool clear); | 250 | bool clear); |
@@ -268,7 +271,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, | |||
268 | uint64_t saddr, uint64_t size); | 271 | uint64_t saddr, uint64_t size); |
269 | void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | 272 | void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, |
270 | struct amdgpu_bo_va *bo_va); | 273 | struct amdgpu_bo_va *bo_va); |
271 | void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); | 274 | void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, |
275 | uint32_t fragment_size_default); | ||
276 | void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, | ||
277 | uint32_t fragment_size_default); | ||
272 | int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); | 278 | int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); |
273 | bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, | 279 | bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, |
274 | struct amdgpu_job *job); | 280 | struct amdgpu_job *job); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index a2c59a08b2bd..26e900627971 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | |||
@@ -28,6 +28,8 @@ | |||
28 | struct amdgpu_vram_mgr { | 28 | struct amdgpu_vram_mgr { |
29 | struct drm_mm mm; | 29 | struct drm_mm mm; |
30 | spinlock_t lock; | 30 | spinlock_t lock; |
31 | atomic64_t usage; | ||
32 | atomic64_t vis_usage; | ||
31 | }; | 33 | }; |
32 | 34 | ||
33 | /** | 35 | /** |
@@ -79,6 +81,27 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) | |||
79 | } | 81 | } |
80 | 82 | ||
81 | /** | 83 | /** |
84 | * amdgpu_vram_mgr_vis_size - Calculate visible node size | ||
85 | * | ||
86 | * @adev: amdgpu device structure | ||
87 | * @node: MM node structure | ||
88 | * | ||
89 | * Calculate how many bytes of the MM node are inside visible VRAM | ||
90 | */ | ||
91 | static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, | ||
92 | struct drm_mm_node *node) | ||
93 | { | ||
94 | uint64_t start = node->start << PAGE_SHIFT; | ||
95 | uint64_t end = (node->size + node->start) << PAGE_SHIFT; | ||
96 | |||
97 | if (start >= adev->mc.visible_vram_size) | ||
98 | return 0; | ||
99 | |||
100 | return (end > adev->mc.visible_vram_size ? | ||
101 | adev->mc.visible_vram_size : end) - start; | ||
102 | } | ||
103 | |||
104 | /** | ||
82 | * amdgpu_vram_mgr_new - allocate new ranges | 105 | * amdgpu_vram_mgr_new - allocate new ranges |
83 | * | 106 | * |
84 | * @man: TTM memory type manager | 107 | * @man: TTM memory type manager |
@@ -93,11 +116,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, | |||
93 | const struct ttm_place *place, | 116 | const struct ttm_place *place, |
94 | struct ttm_mem_reg *mem) | 117 | struct ttm_mem_reg *mem) |
95 | { | 118 | { |
119 | struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); | ||
96 | struct amdgpu_vram_mgr *mgr = man->priv; | 120 | struct amdgpu_vram_mgr *mgr = man->priv; |
97 | struct drm_mm *mm = &mgr->mm; | 121 | struct drm_mm *mm = &mgr->mm; |
98 | struct drm_mm_node *nodes; | 122 | struct drm_mm_node *nodes; |
99 | enum drm_mm_insert_mode mode; | 123 | enum drm_mm_insert_mode mode; |
100 | unsigned long lpfn, num_nodes, pages_per_node, pages_left; | 124 | unsigned long lpfn, num_nodes, pages_per_node, pages_left; |
125 | uint64_t usage = 0, vis_usage = 0; | ||
101 | unsigned i; | 126 | unsigned i; |
102 | int r; | 127 | int r; |
103 | 128 | ||
@@ -142,6 +167,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, | |||
142 | if (unlikely(r)) | 167 | if (unlikely(r)) |
143 | goto error; | 168 | goto error; |
144 | 169 | ||
170 | usage += nodes[i].size << PAGE_SHIFT; | ||
171 | vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); | ||
172 | |||
145 | /* Calculate a virtual BO start address to easily check if | 173 | /* Calculate a virtual BO start address to easily check if |
146 | * everything is CPU accessible. | 174 | * everything is CPU accessible. |
147 | */ | 175 | */ |
@@ -155,6 +183,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, | |||
155 | } | 183 | } |
156 | spin_unlock(&mgr->lock); | 184 | spin_unlock(&mgr->lock); |
157 | 185 | ||
186 | atomic64_add(usage, &mgr->usage); | ||
187 | atomic64_add(vis_usage, &mgr->vis_usage); | ||
188 | |||
158 | mem->mm_node = nodes; | 189 | mem->mm_node = nodes; |
159 | 190 | ||
160 | return 0; | 191 | return 0; |
@@ -181,8 +212,10 @@ error: | |||
181 | static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, | 212 | static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, |
182 | struct ttm_mem_reg *mem) | 213 | struct ttm_mem_reg *mem) |
183 | { | 214 | { |
215 | struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); | ||
184 | struct amdgpu_vram_mgr *mgr = man->priv; | 216 | struct amdgpu_vram_mgr *mgr = man->priv; |
185 | struct drm_mm_node *nodes = mem->mm_node; | 217 | struct drm_mm_node *nodes = mem->mm_node; |
218 | uint64_t usage = 0, vis_usage = 0; | ||
186 | unsigned pages = mem->num_pages; | 219 | unsigned pages = mem->num_pages; |
187 | 220 | ||
188 | if (!mem->mm_node) | 221 | if (!mem->mm_node) |
@@ -192,31 +225,67 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, | |||
192 | while (pages) { | 225 | while (pages) { |
193 | pages -= nodes->size; | 226 | pages -= nodes->size; |
194 | drm_mm_remove_node(nodes); | 227 | drm_mm_remove_node(nodes); |
228 | usage += nodes->size << PAGE_SHIFT; | ||
229 | vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes); | ||
195 | ++nodes; | 230 | ++nodes; |
196 | } | 231 | } |
197 | spin_unlock(&mgr->lock); | 232 | spin_unlock(&mgr->lock); |
198 | 233 | ||
234 | atomic64_sub(usage, &mgr->usage); | ||
235 | atomic64_sub(vis_usage, &mgr->vis_usage); | ||
236 | |||
199 | kfree(mem->mm_node); | 237 | kfree(mem->mm_node); |
200 | mem->mm_node = NULL; | 238 | mem->mm_node = NULL; |
201 | } | 239 | } |
202 | 240 | ||
203 | /** | 241 | /** |
242 | * amdgpu_vram_mgr_usage - how many bytes are used in this domain | ||
243 | * | ||
244 | * @man: TTM memory type manager | ||
245 | * | ||
246 | * Returns how many bytes are used in this domain. | ||
247 | */ | ||
248 | uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man) | ||
249 | { | ||
250 | struct amdgpu_vram_mgr *mgr = man->priv; | ||
251 | |||
252 | return atomic64_read(&mgr->usage); | ||
253 | } | ||
254 | |||
255 | /** | ||
256 | * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part | ||
257 | * | ||
258 | * @man: TTM memory type manager | ||
259 | * | ||
260 | * Returns how many bytes are used in the visible part of VRAM | ||
261 | */ | ||
262 | uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man) | ||
263 | { | ||
264 | struct amdgpu_vram_mgr *mgr = man->priv; | ||
265 | |||
266 | return atomic64_read(&mgr->vis_usage); | ||
267 | } | ||
268 | |||
269 | /** | ||
204 | * amdgpu_vram_mgr_debug - dump VRAM table | 270 | * amdgpu_vram_mgr_debug - dump VRAM table |
205 | * | 271 | * |
206 | * @man: TTM memory type manager | 272 | * @man: TTM memory type manager |
207 | * @prefix: text prefix | 273 | * @printer: DRM printer to use |
208 | * | 274 | * |
209 | * Dump the table content using printk. | 275 | * Dump the table content using printk. |
210 | */ | 276 | */ |
211 | static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, | 277 | static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, |
212 | const char *prefix) | 278 | struct drm_printer *printer) |
213 | { | 279 | { |
214 | struct amdgpu_vram_mgr *mgr = man->priv; | 280 | struct amdgpu_vram_mgr *mgr = man->priv; |
215 | struct drm_printer p = drm_debug_printer(prefix); | ||
216 | 281 | ||
217 | spin_lock(&mgr->lock); | 282 | spin_lock(&mgr->lock); |
218 | drm_mm_print(&mgr->mm, &p); | 283 | drm_mm_print(&mgr->mm, printer); |
219 | spin_unlock(&mgr->lock); | 284 | spin_unlock(&mgr->lock); |
285 | |||
286 | drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", | ||
287 | man->size, amdgpu_vram_mgr_usage(man) >> 20, | ||
288 | amdgpu_vram_mgr_vis_usage(man) >> 20); | ||
220 | } | 289 | } |
221 | 290 | ||
222 | const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { | 291 | const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b39f81dda847..69182eeca264 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | |||
@@ -2157,7 +2157,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) | |||
2157 | struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; | 2157 | struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; |
2158 | const struct cs_section_def *sect = NULL; | 2158 | const struct cs_section_def *sect = NULL; |
2159 | const struct cs_extent_def *ext = NULL; | 2159 | const struct cs_extent_def *ext = NULL; |
2160 | int r, i; | 2160 | int r, i, tmp; |
2161 | 2161 | ||
2162 | /* init the CP */ | 2162 | /* init the CP */ |
2163 | WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); | 2163 | WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); |
@@ -2165,7 +2165,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) | |||
2165 | 2165 | ||
2166 | gfx_v9_0_cp_gfx_enable(adev, true); | 2166 | gfx_v9_0_cp_gfx_enable(adev, true); |
2167 | 2167 | ||
2168 | r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4); | 2168 | r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); |
2169 | if (r) { | 2169 | if (r) { |
2170 | DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); | 2170 | DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); |
2171 | return r; | 2171 | return r; |
@@ -2203,6 +2203,12 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) | |||
2203 | amdgpu_ring_write(ring, 0x8000); | 2203 | amdgpu_ring_write(ring, 0x8000); |
2204 | amdgpu_ring_write(ring, 0x8000); | 2204 | amdgpu_ring_write(ring, 0x8000); |
2205 | 2205 | ||
2206 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); | ||
2207 | tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | | ||
2208 | (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); | ||
2209 | amdgpu_ring_write(ring, tmp); | ||
2210 | amdgpu_ring_write(ring, 0); | ||
2211 | |||
2206 | amdgpu_ring_commit(ring); | 2212 | amdgpu_ring_commit(ring); |
2207 | 2213 | ||
2208 | return 0; | 2214 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 408723ef157c..4f2788b61a08 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | |||
@@ -124,7 +124,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) | |||
124 | 124 | ||
125 | static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) | 125 | static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) |
126 | { | 126 | { |
127 | uint32_t tmp; | 127 | uint32_t tmp, field; |
128 | 128 | ||
129 | /* Setup L2 cache */ | 129 | /* Setup L2 cache */ |
130 | tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL); | 130 | tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL); |
@@ -143,9 +143,10 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) | |||
143 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); | 143 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); |
144 | WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); | 144 | WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); |
145 | 145 | ||
146 | field = adev->vm_manager.fragment_size; | ||
146 | tmp = mmVM_L2_CNTL3_DEFAULT; | 147 | tmp = mmVM_L2_CNTL3_DEFAULT; |
147 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); | 148 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field); |
148 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9); | 149 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); |
149 | WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); | 150 | WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); |
150 | 151 | ||
151 | tmp = mmVM_L2_CNTL4_DEFAULT; | 152 | tmp = mmVM_L2_CNTL4_DEFAULT; |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 93c45f26b7c8..12b0c4cd7a5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | |||
@@ -461,6 +461,7 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) | |||
461 | static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) | 461 | static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) |
462 | { | 462 | { |
463 | int r, i; | 463 | int r, i; |
464 | u32 field; | ||
464 | 465 | ||
465 | if (adev->gart.robj == NULL) { | 466 | if (adev->gart.robj == NULL) { |
466 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); | 467 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); |
@@ -488,10 +489,12 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) | |||
488 | WREG32(mmVM_L2_CNTL2, | 489 | WREG32(mmVM_L2_CNTL2, |
489 | VM_L2_CNTL2__INVALIDATE_ALL_L1_TLBS_MASK | | 490 | VM_L2_CNTL2__INVALIDATE_ALL_L1_TLBS_MASK | |
490 | VM_L2_CNTL2__INVALIDATE_L2_CACHE_MASK); | 491 | VM_L2_CNTL2__INVALIDATE_L2_CACHE_MASK); |
492 | |||
493 | field = adev->vm_manager.fragment_size; | ||
491 | WREG32(mmVM_L2_CNTL3, | 494 | WREG32(mmVM_L2_CNTL3, |
492 | VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK | | 495 | VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK | |
493 | (4UL << VM_L2_CNTL3__BANK_SELECT__SHIFT) | | 496 | (field << VM_L2_CNTL3__BANK_SELECT__SHIFT) | |
494 | (4UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT)); | 497 | (field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT)); |
495 | /* setup context0 */ | 498 | /* setup context0 */ |
496 | WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); | 499 | WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); |
497 | WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); | 500 | WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); |
@@ -811,7 +814,7 @@ static int gmc_v6_0_sw_init(void *handle) | |||
811 | if (r) | 814 | if (r) |
812 | return r; | 815 | return r; |
813 | 816 | ||
814 | amdgpu_vm_adjust_size(adev, 64); | 817 | amdgpu_vm_adjust_size(adev, 64, 4); |
815 | adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; | 818 | adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; |
816 | 819 | ||
817 | adev->mc.mc_mask = 0xffffffffffULL; | 820 | adev->mc.mc_mask = 0xffffffffffULL; |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 4a9e84062874..e42c1ad3af5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | |||
@@ -562,7 +562,7 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable) | |||
562 | static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) | 562 | static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) |
563 | { | 563 | { |
564 | int r, i; | 564 | int r, i; |
565 | u32 tmp; | 565 | u32 tmp, field; |
566 | 566 | ||
567 | if (adev->gart.robj == NULL) { | 567 | if (adev->gart.robj == NULL) { |
568 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); | 568 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); |
@@ -592,10 +592,12 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) | |||
592 | tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); | 592 | tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); |
593 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); | 593 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); |
594 | WREG32(mmVM_L2_CNTL2, tmp); | 594 | WREG32(mmVM_L2_CNTL2, tmp); |
595 | |||
596 | field = adev->vm_manager.fragment_size; | ||
595 | tmp = RREG32(mmVM_L2_CNTL3); | 597 | tmp = RREG32(mmVM_L2_CNTL3); |
596 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); | 598 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); |
597 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4); | 599 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field); |
598 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4); | 600 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field); |
599 | WREG32(mmVM_L2_CNTL3, tmp); | 601 | WREG32(mmVM_L2_CNTL3, tmp); |
600 | /* setup context0 */ | 602 | /* setup context0 */ |
601 | WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); | 603 | WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); |
@@ -948,7 +950,7 @@ static int gmc_v7_0_sw_init(void *handle) | |||
948 | * Currently set to 4GB ((1 << 20) 4k pages). | 950 | * Currently set to 4GB ((1 << 20) 4k pages). |
949 | * Max GPUVM size for cayman and SI is 40 bits. | 951 | * Max GPUVM size for cayman and SI is 40 bits. |
950 | */ | 952 | */ |
951 | amdgpu_vm_adjust_size(adev, 64); | 953 | amdgpu_vm_adjust_size(adev, 64, 4); |
952 | adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; | 954 | adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; |
953 | 955 | ||
954 | /* Set the internal MC address mask | 956 | /* Set the internal MC address mask |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 85c937b5e40b..7ca2dae8237a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | |||
@@ -762,7 +762,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) | |||
762 | static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) | 762 | static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) |
763 | { | 763 | { |
764 | int r, i; | 764 | int r, i; |
765 | u32 tmp; | 765 | u32 tmp, field; |
766 | 766 | ||
767 | if (adev->gart.robj == NULL) { | 767 | if (adev->gart.robj == NULL) { |
768 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); | 768 | dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); |
@@ -793,10 +793,12 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) | |||
793 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); | 793 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); |
794 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); | 794 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); |
795 | WREG32(mmVM_L2_CNTL2, tmp); | 795 | WREG32(mmVM_L2_CNTL2, tmp); |
796 | |||
797 | field = adev->vm_manager.fragment_size; | ||
796 | tmp = RREG32(mmVM_L2_CNTL3); | 798 | tmp = RREG32(mmVM_L2_CNTL3); |
797 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); | 799 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); |
798 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4); | 800 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field); |
799 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4); | 801 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field); |
800 | WREG32(mmVM_L2_CNTL3, tmp); | 802 | WREG32(mmVM_L2_CNTL3, tmp); |
801 | /* XXX: set to enable PTE/PDE in system memory */ | 803 | /* XXX: set to enable PTE/PDE in system memory */ |
802 | tmp = RREG32(mmVM_L2_CNTL4); | 804 | tmp = RREG32(mmVM_L2_CNTL4); |
@@ -1046,7 +1048,7 @@ static int gmc_v8_0_sw_init(void *handle) | |||
1046 | * Currently set to 4GB ((1 << 20) 4k pages). | 1048 | * Currently set to 4GB ((1 << 20) 4k pages). |
1047 | * Max GPUVM size for cayman and SI is 40 bits. | 1049 | * Max GPUVM size for cayman and SI is 40 bits. |
1048 | */ | 1050 | */ |
1049 | amdgpu_vm_adjust_size(adev, 64); | 1051 | amdgpu_vm_adjust_size(adev, 64, 4); |
1050 | adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; | 1052 | adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; |
1051 | 1053 | ||
1052 | /* Set the internal MC address mask | 1054 | /* Set the internal MC address mask |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index c22899a08106..2769c2b3b56e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | |||
@@ -541,9 +541,10 @@ static int gmc_v9_0_sw_init(void *handle) | |||
541 | adev->vm_manager.vm_size = 1U << 18; | 541 | adev->vm_manager.vm_size = 1U << 18; |
542 | adev->vm_manager.block_size = 9; | 542 | adev->vm_manager.block_size = 9; |
543 | adev->vm_manager.num_level = 3; | 543 | adev->vm_manager.num_level = 3; |
544 | amdgpu_vm_set_fragment_size(adev, 9); | ||
544 | } else { | 545 | } else { |
545 | /* vm_size is 64GB for legacy 2-level page support*/ | 546 | /* vm_size is 64GB for legacy 2-level page support */ |
546 | amdgpu_vm_adjust_size(adev, 64); | 547 | amdgpu_vm_adjust_size(adev, 64, 9); |
547 | adev->vm_manager.num_level = 1; | 548 | adev->vm_manager.num_level = 1; |
548 | } | 549 | } |
549 | break; | 550 | break; |
@@ -558,14 +559,16 @@ static int gmc_v9_0_sw_init(void *handle) | |||
558 | adev->vm_manager.vm_size = 1U << 18; | 559 | adev->vm_manager.vm_size = 1U << 18; |
559 | adev->vm_manager.block_size = 9; | 560 | adev->vm_manager.block_size = 9; |
560 | adev->vm_manager.num_level = 3; | 561 | adev->vm_manager.num_level = 3; |
562 | amdgpu_vm_set_fragment_size(adev, 9); | ||
561 | break; | 563 | break; |
562 | default: | 564 | default: |
563 | break; | 565 | break; |
564 | } | 566 | } |
565 | 567 | ||
566 | DRM_INFO("vm size is %llu GB, block size is %u-bit\n", | 568 | DRM_INFO("vm size is %llu GB, block size is %u-bit,fragment size is %u-bit\n", |
567 | adev->vm_manager.vm_size, | 569 | adev->vm_manager.vm_size, |
568 | adev->vm_manager.block_size); | 570 | adev->vm_manager.block_size, |
571 | adev->vm_manager.fragment_size); | ||
569 | 572 | ||
570 | /* This interrupt is VMC page fault.*/ | 573 | /* This interrupt is VMC page fault.*/ |
571 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, | 574 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, |
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index ad8def3cc343..4395a4f12149 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | |||
@@ -138,7 +138,7 @@ static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) | |||
138 | 138 | ||
139 | static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) | 139 | static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) |
140 | { | 140 | { |
141 | uint32_t tmp; | 141 | uint32_t tmp, field; |
142 | 142 | ||
143 | /* Setup L2 cache */ | 143 | /* Setup L2 cache */ |
144 | tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); | 144 | tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); |
@@ -157,9 +157,10 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) | |||
157 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); | 157 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); |
158 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); | 158 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); |
159 | 159 | ||
160 | field = adev->vm_manager.fragment_size; | ||
160 | tmp = mmVM_L2_CNTL3_DEFAULT; | 161 | tmp = mmVM_L2_CNTL3_DEFAULT; |
161 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); | 162 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field); |
162 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9); | 163 | tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); |
163 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); | 164 | WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); |
164 | 165 | ||
165 | tmp = mmVM_L2_CNTL4_DEFAULT; | 166 | tmp = mmVM_L2_CNTL4_DEFAULT; |
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index e79befd80eed..7f408f85fdb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h | |||
@@ -250,6 +250,7 @@ | |||
250 | #define PACKET3_SET_UCONFIG_REG 0x79 | 250 | #define PACKET3_SET_UCONFIG_REG 0x79 |
251 | #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 | 251 | #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 |
252 | #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 | 252 | #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 |
253 | #define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28) | ||
253 | #define PACKET3_SCRATCH_RAM_WRITE 0x7D | 254 | #define PACKET3_SCRATCH_RAM_WRITE 0x7D |
254 | #define PACKET3_SCRATCH_RAM_READ 0x7E | 255 | #define PACKET3_SCRATCH_RAM_READ 0x7E |
255 | #define PACKET3_LOAD_CONST_RAM 0x80 | 256 | #define PACKET3_LOAD_CONST_RAM 0x80 |
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 999c35a25498..b0ad7fcefcf5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c | |||
@@ -179,7 +179,8 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man, | |||
179 | } | 179 | } |
180 | 180 | ||
181 | static void | 181 | static void |
182 | nouveau_gart_manager_debug(struct ttm_mem_type_manager *man, const char *prefix) | 182 | nouveau_gart_manager_debug(struct ttm_mem_type_manager *man, |
183 | struct drm_printer *printer) | ||
183 | { | 184 | { |
184 | } | 185 | } |
185 | 186 | ||
@@ -252,7 +253,8 @@ nv04_gart_manager_new(struct ttm_mem_type_manager *man, | |||
252 | } | 253 | } |
253 | 254 | ||
254 | static void | 255 | static void |
255 | nv04_gart_manager_debug(struct ttm_mem_type_manager *man, const char *prefix) | 256 | nv04_gart_manager_debug(struct ttm_mem_type_manager *man, |
257 | struct drm_printer *printer) | ||
256 | { | 258 | { |
257 | } | 259 | } |
258 | 260 | ||
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 50f60a587648..bf69bf9086bf 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c | |||
@@ -1030,19 +1030,17 @@ int radeon_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1030 | static int radeon_mm_dump_table(struct seq_file *m, void *data) | 1030 | static int radeon_mm_dump_table(struct seq_file *m, void *data) |
1031 | { | 1031 | { |
1032 | struct drm_info_node *node = (struct drm_info_node *)m->private; | 1032 | struct drm_info_node *node = (struct drm_info_node *)m->private; |
1033 | unsigned ttm_pl = *(int *)node->info_ent->data; | 1033 | unsigned ttm_pl = *(int*)node->info_ent->data; |
1034 | struct drm_device *dev = node->minor->dev; | 1034 | struct drm_device *dev = node->minor->dev; |
1035 | struct radeon_device *rdev = dev->dev_private; | 1035 | struct radeon_device *rdev = dev->dev_private; |
1036 | struct drm_mm *mm = (struct drm_mm *)rdev->mman.bdev.man[ttm_pl].priv; | 1036 | struct ttm_mem_type_manager *man = &rdev->mman.bdev.man[ttm_pl]; |
1037 | struct ttm_bo_global *glob = rdev->mman.bdev.glob; | ||
1038 | struct drm_printer p = drm_seq_file_printer(m); | 1037 | struct drm_printer p = drm_seq_file_printer(m); |
1039 | 1038 | ||
1040 | spin_lock(&glob->lru_lock); | 1039 | man->func->debug(man, &p); |
1041 | drm_mm_print(mm, &p); | ||
1042 | spin_unlock(&glob->lru_lock); | ||
1043 | return 0; | 1040 | return 0; |
1044 | } | 1041 | } |
1045 | 1042 | ||
1043 | |||
1046 | static int ttm_pl_vram = TTM_PL_VRAM; | 1044 | static int ttm_pl_vram = TTM_PL_VRAM; |
1047 | static int ttm_pl_tt = TTM_PL_TT; | 1045 | static int ttm_pl_tt = TTM_PL_TT; |
1048 | 1046 | ||
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 22b57020790d..cba11f13d994 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c | |||
@@ -70,6 +70,7 @@ static inline int ttm_mem_type_from_place(const struct ttm_place *place, | |||
70 | static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type) | 70 | static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type) |
71 | { | 71 | { |
72 | struct ttm_mem_type_manager *man = &bdev->man[mem_type]; | 72 | struct ttm_mem_type_manager *man = &bdev->man[mem_type]; |
73 | struct drm_printer p = drm_debug_printer(TTM_PFX); | ||
73 | 74 | ||
74 | pr_err(" has_type: %d\n", man->has_type); | 75 | pr_err(" has_type: %d\n", man->has_type); |
75 | pr_err(" use_type: %d\n", man->use_type); | 76 | pr_err(" use_type: %d\n", man->use_type); |
@@ -79,7 +80,7 @@ static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type) | |||
79 | pr_err(" available_caching: 0x%08X\n", man->available_caching); | 80 | pr_err(" available_caching: 0x%08X\n", man->available_caching); |
80 | pr_err(" default_caching: 0x%08X\n", man->default_caching); | 81 | pr_err(" default_caching: 0x%08X\n", man->default_caching); |
81 | if (mem_type != TTM_PL_SYSTEM) | 82 | if (mem_type != TTM_PL_SYSTEM) |
82 | (*man->func->debug)(man, TTM_PFX); | 83 | (*man->func->debug)(man, &p); |
83 | } | 84 | } |
84 | 85 | ||
85 | static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo, | 86 | static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo, |
@@ -394,14 +395,33 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) | |||
394 | ww_mutex_unlock (&bo->resv->lock); | 395 | ww_mutex_unlock (&bo->resv->lock); |
395 | } | 396 | } |
396 | 397 | ||
398 | static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) | ||
399 | { | ||
400 | int r; | ||
401 | |||
402 | if (bo->resv == &bo->ttm_resv) | ||
403 | return 0; | ||
404 | |||
405 | reservation_object_init(&bo->ttm_resv); | ||
406 | BUG_ON(!reservation_object_trylock(&bo->ttm_resv)); | ||
407 | |||
408 | r = reservation_object_copy_fences(&bo->ttm_resv, bo->resv); | ||
409 | if (r) { | ||
410 | reservation_object_unlock(&bo->ttm_resv); | ||
411 | reservation_object_fini(&bo->ttm_resv); | ||
412 | } | ||
413 | |||
414 | return r; | ||
415 | } | ||
416 | |||
397 | static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) | 417 | static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) |
398 | { | 418 | { |
399 | struct reservation_object_list *fobj; | 419 | struct reservation_object_list *fobj; |
400 | struct dma_fence *fence; | 420 | struct dma_fence *fence; |
401 | int i; | 421 | int i; |
402 | 422 | ||
403 | fobj = reservation_object_get_list(bo->resv); | 423 | fobj = reservation_object_get_list(&bo->ttm_resv); |
404 | fence = reservation_object_get_excl(bo->resv); | 424 | fence = reservation_object_get_excl(&bo->ttm_resv); |
405 | if (fence && !fence->ops->signaled) | 425 | if (fence && !fence->ops->signaled) |
406 | dma_fence_enable_sw_signaling(fence); | 426 | dma_fence_enable_sw_signaling(fence); |
407 | 427 | ||
@@ -430,8 +450,19 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) | |||
430 | ttm_bo_cleanup_memtype_use(bo); | 450 | ttm_bo_cleanup_memtype_use(bo); |
431 | 451 | ||
432 | return; | 452 | return; |
433 | } else | 453 | } |
434 | ttm_bo_flush_all_fences(bo); | 454 | |
455 | ret = ttm_bo_individualize_resv(bo); | ||
456 | if (ret) { | ||
457 | /* Last resort, if we fail to allocate memory for the | ||
458 | * fences block for the BO to become idle and free it. | ||
459 | */ | ||
460 | spin_unlock(&glob->lru_lock); | ||
461 | ttm_bo_wait(bo, true, true); | ||
462 | ttm_bo_cleanup_memtype_use(bo); | ||
463 | return; | ||
464 | } | ||
465 | ttm_bo_flush_all_fences(bo); | ||
435 | 466 | ||
436 | /* | 467 | /* |
437 | * Make NO_EVICT bos immediately available to | 468 | * Make NO_EVICT bos immediately available to |
@@ -443,6 +474,8 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) | |||
443 | ttm_bo_add_to_lru(bo); | 474 | ttm_bo_add_to_lru(bo); |
444 | } | 475 | } |
445 | 476 | ||
477 | if (bo->resv != &bo->ttm_resv) | ||
478 | reservation_object_unlock(&bo->ttm_resv); | ||
446 | __ttm_bo_unreserve(bo); | 479 | __ttm_bo_unreserve(bo); |
447 | } | 480 | } |
448 | 481 | ||
@@ -471,17 +504,25 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, | |||
471 | bool no_wait_gpu) | 504 | bool no_wait_gpu) |
472 | { | 505 | { |
473 | struct ttm_bo_global *glob = bo->glob; | 506 | struct ttm_bo_global *glob = bo->glob; |
507 | struct reservation_object *resv; | ||
474 | int ret; | 508 | int ret; |
475 | 509 | ||
476 | ret = ttm_bo_wait(bo, false, true); | 510 | if (unlikely(list_empty(&bo->ddestroy))) |
511 | resv = bo->resv; | ||
512 | else | ||
513 | resv = &bo->ttm_resv; | ||
514 | |||
515 | if (reservation_object_test_signaled_rcu(resv, true)) | ||
516 | ret = 0; | ||
517 | else | ||
518 | ret = -EBUSY; | ||
477 | 519 | ||
478 | if (ret && !no_wait_gpu) { | 520 | if (ret && !no_wait_gpu) { |
479 | long lret; | 521 | long lret; |
480 | ww_mutex_unlock(&bo->resv->lock); | 522 | ww_mutex_unlock(&bo->resv->lock); |
481 | spin_unlock(&glob->lru_lock); | 523 | spin_unlock(&glob->lru_lock); |
482 | 524 | ||
483 | lret = reservation_object_wait_timeout_rcu(bo->resv, | 525 | lret = reservation_object_wait_timeout_rcu(resv, true, |
484 | true, | ||
485 | interruptible, | 526 | interruptible, |
486 | 30 * HZ); | 527 | 30 * HZ); |
487 | 528 | ||
@@ -505,13 +546,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, | |||
505 | spin_unlock(&glob->lru_lock); | 546 | spin_unlock(&glob->lru_lock); |
506 | return 0; | 547 | return 0; |
507 | } | 548 | } |
508 | |||
509 | /* | ||
510 | * remove sync_obj with ttm_bo_wait, the wait should be | ||
511 | * finished, and no new wait object should have been added. | ||
512 | */ | ||
513 | ret = ttm_bo_wait(bo, false, true); | ||
514 | WARN_ON(ret); | ||
515 | } | 549 | } |
516 | 550 | ||
517 | if (ret || unlikely(list_empty(&bo->ddestroy))) { | 551 | if (ret || unlikely(list_empty(&bo->ddestroy))) { |
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c index 90a6c0b03afc..a7c232dc39cb 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_manager.c +++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c | |||
@@ -136,13 +136,12 @@ static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man) | |||
136 | } | 136 | } |
137 | 137 | ||
138 | static void ttm_bo_man_debug(struct ttm_mem_type_manager *man, | 138 | static void ttm_bo_man_debug(struct ttm_mem_type_manager *man, |
139 | const char *prefix) | 139 | struct drm_printer *printer) |
140 | { | 140 | { |
141 | struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv; | 141 | struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv; |
142 | struct drm_printer p = drm_debug_printer(prefix); | ||
143 | 142 | ||
144 | spin_lock(&rman->lock); | 143 | spin_lock(&rman->lock); |
145 | drm_mm_print(&rman->mm, &p); | 144 | drm_mm_print(&rman->mm, printer); |
146 | spin_unlock(&rman->lock); | 145 | spin_unlock(&rman->lock); |
147 | } | 146 | } |
148 | 147 | ||
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index eeddc1e48409..871599826773 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c | |||
@@ -615,7 +615,7 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, | |||
615 | } else { | 615 | } else { |
616 | pr_err("Failed to fill pool (%p)\n", pool); | 616 | pr_err("Failed to fill pool (%p)\n", pool); |
617 | /* If we have any pages left put them to the pool. */ | 617 | /* If we have any pages left put them to the pool. */ |
618 | list_for_each_entry(p, &pool->list, lru) { | 618 | list_for_each_entry(p, &new_pages, lru) { |
619 | ++cpages; | 619 | ++cpages; |
620 | } | 620 | } |
621 | list_splice(&new_pages, &pool->list); | 621 | list_splice(&new_pages, &pool->list); |
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c index e695d74eaa9f..cd389c5eaef5 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ttm.c +++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c | |||
@@ -192,7 +192,7 @@ static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man) | |||
192 | } | 192 | } |
193 | 193 | ||
194 | static void ttm_bo_man_debug(struct ttm_mem_type_manager *man, | 194 | static void ttm_bo_man_debug(struct ttm_mem_type_manager *man, |
195 | const char *prefix) | 195 | struct drm_printer *printer) |
196 | { | 196 | { |
197 | } | 197 | } |
198 | 198 | ||
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index d2b03d4a3c86..f2f9d88131f2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | |||
@@ -157,9 +157,9 @@ static int vmw_gmrid_man_takedown(struct ttm_mem_type_manager *man) | |||
157 | } | 157 | } |
158 | 158 | ||
159 | static void vmw_gmrid_man_debug(struct ttm_mem_type_manager *man, | 159 | static void vmw_gmrid_man_debug(struct ttm_mem_type_manager *man, |
160 | const char *prefix) | 160 | struct drm_printer *printer) |
161 | { | 161 | { |
162 | pr_info("%s: No debug info available for the GMR id manager\n", prefix); | 162 | drm_printf(printer, "No debug info available for the GMR id manager\n"); |
163 | } | 163 | } |
164 | 164 | ||
165 | const struct ttm_mem_type_manager_func vmw_gmrid_manager_func = { | 165 | const struct ttm_mem_type_manager_func vmw_gmrid_manager_func = { |
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d30850e07936..5f821a9b3a1f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h | |||
@@ -229,13 +229,14 @@ struct ttm_mem_type_manager_func { | |||
229 | * struct ttm_mem_type_manager member debug | 229 | * struct ttm_mem_type_manager member debug |
230 | * | 230 | * |
231 | * @man: Pointer to a memory type manager. | 231 | * @man: Pointer to a memory type manager. |
232 | * @prefix: Prefix to be used in printout to identify the caller. | 232 | * @printer: Prefix to be used in printout to identify the caller. |
233 | * | 233 | * |
234 | * This function is called to print out the state of the memory | 234 | * This function is called to print out the state of the memory |
235 | * type manager to aid debugging of out-of-memory conditions. | 235 | * type manager to aid debugging of out-of-memory conditions. |
236 | * It may not be called from within atomic context. | 236 | * It may not be called from within atomic context. |
237 | */ | 237 | */ |
238 | void (*debug)(struct ttm_mem_type_manager *man, const char *prefix); | 238 | void (*debug)(struct ttm_mem_type_manager *man, |
239 | struct drm_printer *printer); | ||
239 | }; | 240 | }; |
240 | 241 | ||
241 | /** | 242 | /** |