aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c496
1 files changed, 441 insertions, 55 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 205da3ff9cd0..e93a0a237dc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -63,16 +63,44 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
63/* 63/*
64 * Global memory. 64 * Global memory.
65 */ 65 */
66
67/**
68 * amdgpu_ttm_mem_global_init - Initialize and acquire reference to
69 * memory object
70 *
71 * @ref: Object for initialization.
72 *
73 * This is called by drm_global_item_ref() when an object is being
74 * initialized.
75 */
66static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref) 76static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
67{ 77{
68 return ttm_mem_global_init(ref->object); 78 return ttm_mem_global_init(ref->object);
69} 79}
70 80
81/**
82 * amdgpu_ttm_mem_global_release - Drop reference to a memory object
83 *
84 * @ref: Object being removed
85 *
86 * This is called by drm_global_item_unref() when an object is being
87 * released.
88 */
71static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) 89static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
72{ 90{
73 ttm_mem_global_release(ref->object); 91 ttm_mem_global_release(ref->object);
74} 92}
75 93
94/**
95 * amdgpu_ttm_global_init - Initialize global TTM memory reference
96 * structures.
97 *
98 * @adev: AMDGPU device for which the global structures need to be
99 * registered.
100 *
101 * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
102 * during bring up.
103 */
76static int amdgpu_ttm_global_init(struct amdgpu_device *adev) 104static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
77{ 105{
78 struct drm_global_reference *global_ref; 106 struct drm_global_reference *global_ref;
@@ -80,7 +108,9 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
80 struct drm_sched_rq *rq; 108 struct drm_sched_rq *rq;
81 int r; 109 int r;
82 110
111 /* ensure reference is false in case init fails */
83 adev->mman.mem_global_referenced = false; 112 adev->mman.mem_global_referenced = false;
113
84 global_ref = &adev->mman.mem_global_ref; 114 global_ref = &adev->mman.mem_global_ref;
85 global_ref->global_type = DRM_GLOBAL_TTM_MEM; 115 global_ref->global_type = DRM_GLOBAL_TTM_MEM;
86 global_ref->size = sizeof(struct ttm_mem_global); 116 global_ref->size = sizeof(struct ttm_mem_global);
@@ -111,7 +141,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
111 ring = adev->mman.buffer_funcs_ring; 141 ring = adev->mman.buffer_funcs_ring;
112 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; 142 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
113 r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, 143 r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
114 rq, amdgpu_sched_jobs, NULL); 144 rq, NULL);
115 if (r) { 145 if (r) {
116 DRM_ERROR("Failed setting up TTM BO move run queue.\n"); 146 DRM_ERROR("Failed setting up TTM BO move run queue.\n");
117 goto error_entity; 147 goto error_entity;
@@ -146,6 +176,18 @@ static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
146 return 0; 176 return 0;
147} 177}
148 178
179/**
180 * amdgpu_init_mem_type - Initialize a memory manager for a specific
181 * type of memory request.
182 *
183 * @bdev: The TTM BO device object (contains a reference to
184 * amdgpu_device)
185 * @type: The type of memory requested
186 * @man:
187 *
188 * This is called by ttm_bo_init_mm() when a buffer object is being
189 * initialized.
190 */
149static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, 191static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
150 struct ttm_mem_type_manager *man) 192 struct ttm_mem_type_manager *man)
151{ 193{
@@ -161,6 +203,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
161 man->default_caching = TTM_PL_FLAG_CACHED; 203 man->default_caching = TTM_PL_FLAG_CACHED;
162 break; 204 break;
163 case TTM_PL_TT: 205 case TTM_PL_TT:
206 /* GTT memory */
164 man->func = &amdgpu_gtt_mgr_func; 207 man->func = &amdgpu_gtt_mgr_func;
165 man->gpu_offset = adev->gmc.gart_start; 208 man->gpu_offset = adev->gmc.gart_start;
166 man->available_caching = TTM_PL_MASK_CACHING; 209 man->available_caching = TTM_PL_MASK_CACHING;
@@ -193,6 +236,14 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
193 return 0; 236 return 0;
194} 237}
195 238
239/**
240 * amdgpu_evict_flags - Compute placement flags
241 *
242 * @bo: The buffer object to evict
243 * @placement: Possible destination(s) for evicted BO
244 *
245 * Fill in placement data when ttm_bo_evict() is called
246 */
196static void amdgpu_evict_flags(struct ttm_buffer_object *bo, 247static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
197 struct ttm_placement *placement) 248 struct ttm_placement *placement)
198{ 249{
@@ -204,12 +255,14 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
204 .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM 255 .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
205 }; 256 };
206 257
258 /* Don't handle scatter gather BOs */
207 if (bo->type == ttm_bo_type_sg) { 259 if (bo->type == ttm_bo_type_sg) {
208 placement->num_placement = 0; 260 placement->num_placement = 0;
209 placement->num_busy_placement = 0; 261 placement->num_busy_placement = 0;
210 return; 262 return;
211 } 263 }
212 264
265 /* Object isn't an AMDGPU object so ignore */
213 if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { 266 if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
214 placement->placement = &placements; 267 placement->placement = &placements;
215 placement->busy_placement = &placements; 268 placement->busy_placement = &placements;
@@ -217,26 +270,16 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
217 placement->num_busy_placement = 1; 270 placement->num_busy_placement = 1;
218 return; 271 return;
219 } 272 }
273
220 abo = ttm_to_amdgpu_bo(bo); 274 abo = ttm_to_amdgpu_bo(bo);
221 switch (bo->mem.mem_type) { 275 switch (bo->mem.mem_type) {
222 case TTM_PL_VRAM: 276 case TTM_PL_VRAM:
223 if (!adev->mman.buffer_funcs_enabled) { 277 if (!adev->mman.buffer_funcs_enabled) {
278 /* Move to system memory */
224 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); 279 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
225 } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && 280 } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
226 !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { 281 !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
227 unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; 282 amdgpu_bo_in_cpu_visible_vram(abo)) {
228 struct drm_mm_node *node = bo->mem.mm_node;
229 unsigned long pages_left;
230
231 for (pages_left = bo->mem.num_pages;
232 pages_left;
233 pages_left -= node->size, node++) {
234 if (node->start < fpfn)
235 break;
236 }
237
238 if (!pages_left)
239 goto gtt;
240 283
241 /* Try evicting to the CPU inaccessible part of VRAM 284 /* Try evicting to the CPU inaccessible part of VRAM
242 * first, but only set GTT as busy placement, so this 285 * first, but only set GTT as busy placement, so this
@@ -245,12 +288,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
245 */ 288 */
246 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | 289 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
247 AMDGPU_GEM_DOMAIN_GTT); 290 AMDGPU_GEM_DOMAIN_GTT);
248 abo->placements[0].fpfn = fpfn; 291 abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
249 abo->placements[0].lpfn = 0; 292 abo->placements[0].lpfn = 0;
250 abo->placement.busy_placement = &abo->placements[1]; 293 abo->placement.busy_placement = &abo->placements[1];
251 abo->placement.num_busy_placement = 1; 294 abo->placement.num_busy_placement = 1;
252 } else { 295 } else {
253gtt: 296 /* Move to GTT memory */
254 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); 297 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
255 } 298 }
256 break; 299 break;
@@ -261,6 +304,15 @@ gtt:
261 *placement = abo->placement; 304 *placement = abo->placement;
262} 305}
263 306
307/**
308 * amdgpu_verify_access - Verify access for a mmap call
309 *
310 * @bo: The buffer object to map
311 * @filp: The file pointer from the process performing the mmap
312 *
313 * This is called by ttm_bo_mmap() to verify whether a process
314 * has the right to mmap a BO to their process space.
315 */
264static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) 316static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
265{ 317{
266 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); 318 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
@@ -278,6 +330,15 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
278 filp->private_data); 330 filp->private_data);
279} 331}
280 332
333/**
334 * amdgpu_move_null - Register memory for a buffer object
335 *
336 * @bo: The bo to assign the memory to
337 * @new_mem: The memory to be assigned.
338 *
339 * Assign the memory from new_mem to the memory of the buffer object
340 * bo.
341 */
281static void amdgpu_move_null(struct ttm_buffer_object *bo, 342static void amdgpu_move_null(struct ttm_buffer_object *bo,
282 struct ttm_mem_reg *new_mem) 343 struct ttm_mem_reg *new_mem)
283{ 344{
@@ -288,6 +349,10 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo,
288 new_mem->mm_node = NULL; 349 new_mem->mm_node = NULL;
289} 350}
290 351
352/**
353 * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT
354 * buffer.
355 */
291static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, 356static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
292 struct drm_mm_node *mm_node, 357 struct drm_mm_node *mm_node,
293 struct ttm_mem_reg *mem) 358 struct ttm_mem_reg *mem)
@@ -302,9 +367,10 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
302} 367}
303 368
304/** 369/**
305 * amdgpu_find_mm_node - Helper function finds the drm_mm_node 370 * amdgpu_find_mm_node - Helper function finds the drm_mm_node
306 * corresponding to @offset. It also modifies the offset to be 371 * corresponding to @offset. It also modifies
307 * within the drm_mm_node returned 372 * the offset to be within the drm_mm_node
373 * returned
308 */ 374 */
309static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, 375static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
310 unsigned long *offset) 376 unsigned long *offset)
@@ -443,7 +509,12 @@ error:
443 return r; 509 return r;
444} 510}
445 511
446 512/**
513 * amdgpu_move_blit - Copy an entire buffer to another buffer
514 *
515 * This is a helper called by amdgpu_bo_move() and
516 * amdgpu_move_vram_ram() to help move buffers to and from VRAM.
517 */
447static int amdgpu_move_blit(struct ttm_buffer_object *bo, 518static int amdgpu_move_blit(struct ttm_buffer_object *bo,
448 bool evict, bool no_wait_gpu, 519 bool evict, bool no_wait_gpu,
449 struct ttm_mem_reg *new_mem, 520 struct ttm_mem_reg *new_mem,
@@ -478,6 +549,11 @@ error:
478 return r; 549 return r;
479} 550}
480 551
552/**
553 * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer
554 *
555 * Called by amdgpu_bo_move().
556 */
481static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, 557static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
482 struct ttm_operation_ctx *ctx, 558 struct ttm_operation_ctx *ctx,
483 struct ttm_mem_reg *new_mem) 559 struct ttm_mem_reg *new_mem)
@@ -490,6 +566,8 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
490 int r; 566 int r;
491 567
492 adev = amdgpu_ttm_adev(bo->bdev); 568 adev = amdgpu_ttm_adev(bo->bdev);
569
570 /* create space/pages for new_mem in GTT space */
493 tmp_mem = *new_mem; 571 tmp_mem = *new_mem;
494 tmp_mem.mm_node = NULL; 572 tmp_mem.mm_node = NULL;
495 placement.num_placement = 1; 573 placement.num_placement = 1;
@@ -504,25 +582,36 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
504 return r; 582 return r;
505 } 583 }
506 584
585 /* set caching flags */
507 r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement); 586 r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
508 if (unlikely(r)) { 587 if (unlikely(r)) {
509 goto out_cleanup; 588 goto out_cleanup;
510 } 589 }
511 590
591 /* Bind the memory to the GTT space */
512 r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx); 592 r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
513 if (unlikely(r)) { 593 if (unlikely(r)) {
514 goto out_cleanup; 594 goto out_cleanup;
515 } 595 }
596
597 /* blit VRAM to GTT */
516 r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem); 598 r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem);
517 if (unlikely(r)) { 599 if (unlikely(r)) {
518 goto out_cleanup; 600 goto out_cleanup;
519 } 601 }
602
603 /* move BO (in tmp_mem) to new_mem */
520 r = ttm_bo_move_ttm(bo, ctx, new_mem); 604 r = ttm_bo_move_ttm(bo, ctx, new_mem);
521out_cleanup: 605out_cleanup:
522 ttm_bo_mem_put(bo, &tmp_mem); 606 ttm_bo_mem_put(bo, &tmp_mem);
523 return r; 607 return r;
524} 608}
525 609
610/**
611 * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM
612 *
613 * Called by amdgpu_bo_move().
614 */
526static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, 615static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
527 struct ttm_operation_ctx *ctx, 616 struct ttm_operation_ctx *ctx,
528 struct ttm_mem_reg *new_mem) 617 struct ttm_mem_reg *new_mem)
@@ -535,6 +624,8 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
535 int r; 624 int r;
536 625
537 adev = amdgpu_ttm_adev(bo->bdev); 626 adev = amdgpu_ttm_adev(bo->bdev);
627
628 /* make space in GTT for old_mem buffer */
538 tmp_mem = *new_mem; 629 tmp_mem = *new_mem;
539 tmp_mem.mm_node = NULL; 630 tmp_mem.mm_node = NULL;
540 placement.num_placement = 1; 631 placement.num_placement = 1;
@@ -548,10 +639,14 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
548 if (unlikely(r)) { 639 if (unlikely(r)) {
549 return r; 640 return r;
550 } 641 }
642
643 /* move/bind old memory to GTT space */
551 r = ttm_bo_move_ttm(bo, ctx, &tmp_mem); 644 r = ttm_bo_move_ttm(bo, ctx, &tmp_mem);
552 if (unlikely(r)) { 645 if (unlikely(r)) {
553 goto out_cleanup; 646 goto out_cleanup;
554 } 647 }
648
649 /* copy to VRAM */
555 r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem); 650 r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem);
556 if (unlikely(r)) { 651 if (unlikely(r)) {
557 goto out_cleanup; 652 goto out_cleanup;
@@ -561,6 +656,11 @@ out_cleanup:
561 return r; 656 return r;
562} 657}
563 658
659/**
660 * amdgpu_bo_move - Move a buffer object to a new memory location
661 *
662 * Called by ttm_bo_handle_move_mem()
663 */
564static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, 664static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
565 struct ttm_operation_ctx *ctx, 665 struct ttm_operation_ctx *ctx,
566 struct ttm_mem_reg *new_mem) 666 struct ttm_mem_reg *new_mem)
@@ -626,6 +726,11 @@ memcpy:
626 return 0; 726 return 0;
627} 727}
628 728
729/**
730 * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
731 *
732 * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
733 */
629static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) 734static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
630{ 735{
631 struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; 736 struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
@@ -695,7 +800,7 @@ struct amdgpu_ttm_tt {
695 struct ttm_dma_tt ttm; 800 struct ttm_dma_tt ttm;
696 u64 offset; 801 u64 offset;
697 uint64_t userptr; 802 uint64_t userptr;
698 struct mm_struct *usermm; 803 struct task_struct *usertask;
699 uint32_t userflags; 804 uint32_t userflags;
700 spinlock_t guptasklock; 805 spinlock_t guptasklock;
701 struct list_head guptasks; 806 struct list_head guptasks;
@@ -703,17 +808,29 @@ struct amdgpu_ttm_tt {
703 uint32_t last_set_pages; 808 uint32_t last_set_pages;
704}; 809};
705 810
811/**
812 * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to
813 * by a USERPTR pointer to memory
814 *
815 * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
816 * This provides a wrapper around the get_user_pages() call to provide
817 * device accessible pages that back user memory.
818 */
706int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) 819int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
707{ 820{
708 struct amdgpu_ttm_tt *gtt = (void *)ttm; 821 struct amdgpu_ttm_tt *gtt = (void *)ttm;
822 struct mm_struct *mm = gtt->usertask->mm;
709 unsigned int flags = 0; 823 unsigned int flags = 0;
710 unsigned pinned = 0; 824 unsigned pinned = 0;
711 int r; 825 int r;
712 826
827 if (!mm) /* Happens during process shutdown */
828 return -ESRCH;
829
713 if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) 830 if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
714 flags |= FOLL_WRITE; 831 flags |= FOLL_WRITE;
715 832
716 down_read(&current->mm->mmap_sem); 833 down_read(&mm->mmap_sem);
717 834
718 if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { 835 if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
719 /* check that we only use anonymous memory 836 /* check that we only use anonymous memory
@@ -721,13 +838,14 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
721 unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; 838 unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
722 struct vm_area_struct *vma; 839 struct vm_area_struct *vma;
723 840
724 vma = find_vma(gtt->usermm, gtt->userptr); 841 vma = find_vma(mm, gtt->userptr);
725 if (!vma || vma->vm_file || vma->vm_end < end) { 842 if (!vma || vma->vm_file || vma->vm_end < end) {
726 up_read(&current->mm->mmap_sem); 843 up_read(&mm->mmap_sem);
727 return -EPERM; 844 return -EPERM;
728 } 845 }
729 } 846 }
730 847
848 /* loop enough times using contiguous pages of memory */
731 do { 849 do {
732 unsigned num_pages = ttm->num_pages - pinned; 850 unsigned num_pages = ttm->num_pages - pinned;
733 uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; 851 uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
@@ -739,7 +857,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
739 list_add(&guptask.list, &gtt->guptasks); 857 list_add(&guptask.list, &gtt->guptasks);
740 spin_unlock(&gtt->guptasklock); 858 spin_unlock(&gtt->guptasklock);
741 859
742 r = get_user_pages(userptr, num_pages, flags, p, NULL); 860 if (mm == current->mm)
861 r = get_user_pages(userptr, num_pages, flags, p, NULL);
862 else
863 r = get_user_pages_remote(gtt->usertask,
864 mm, userptr, num_pages,
865 flags, p, NULL, NULL);
743 866
744 spin_lock(&gtt->guptasklock); 867 spin_lock(&gtt->guptasklock);
745 list_del(&guptask.list); 868 list_del(&guptask.list);
@@ -752,15 +875,23 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
752 875
753 } while (pinned < ttm->num_pages); 876 } while (pinned < ttm->num_pages);
754 877
755 up_read(&current->mm->mmap_sem); 878 up_read(&mm->mmap_sem);
756 return 0; 879 return 0;
757 880
758release_pages: 881release_pages:
759 release_pages(pages, pinned); 882 release_pages(pages, pinned);
760 up_read(&current->mm->mmap_sem); 883 up_read(&mm->mmap_sem);
761 return r; 884 return r;
762} 885}
763 886
887/**
888 * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages
889 * as necessary.
890 *
891 * Called by amdgpu_cs_list_validate(). This creates the page list
892 * that backs user memory and will ultimately be mapped into the device
893 * address space.
894 */
764void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) 895void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
765{ 896{
766 struct amdgpu_ttm_tt *gtt = (void *)ttm; 897 struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -775,6 +906,11 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
775 } 906 }
776} 907}
777 908
909/**
910 * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
911 *
912 * Called while unpinning userptr pages
913 */
778void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) 914void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
779{ 915{
780 struct amdgpu_ttm_tt *gtt = (void *)ttm; 916 struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -793,7 +929,12 @@ void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
793 } 929 }
794} 930}
795 931
796/* prepare the sg table with the user pages */ 932/**
933 * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the
934 * user pages
935 *
936 * Called by amdgpu_ttm_backend_bind()
937 **/
797static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) 938static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
798{ 939{
799 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); 940 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
@@ -805,17 +946,20 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
805 enum dma_data_direction direction = write ? 946 enum dma_data_direction direction = write ?
806 DMA_BIDIRECTIONAL : DMA_TO_DEVICE; 947 DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
807 948
949 /* Allocate an SG array and squash pages into it */
808 r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, 950 r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
809 ttm->num_pages << PAGE_SHIFT, 951 ttm->num_pages << PAGE_SHIFT,
810 GFP_KERNEL); 952 GFP_KERNEL);
811 if (r) 953 if (r)
812 goto release_sg; 954 goto release_sg;
813 955
956 /* Map SG to device */
814 r = -ENOMEM; 957 r = -ENOMEM;
815 nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); 958 nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
816 if (nents != ttm->sg->nents) 959 if (nents != ttm->sg->nents)
817 goto release_sg; 960 goto release_sg;
818 961
962 /* convert SG to linear array of pages and dma addresses */
819 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, 963 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
820 gtt->ttm.dma_address, ttm->num_pages); 964 gtt->ttm.dma_address, ttm->num_pages);
821 965
@@ -826,6 +970,9 @@ release_sg:
826 return r; 970 return r;
827} 971}
828 972
973/**
974 * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
975 */
829static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) 976static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
830{ 977{
831 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); 978 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
@@ -839,14 +986,60 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
839 if (!ttm->sg->sgl) 986 if (!ttm->sg->sgl)
840 return; 987 return;
841 988
842 /* free the sg table and pages again */ 989 /* unmap the pages mapped to the device */
843 dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); 990 dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
844 991
992 /* mark the pages as dirty */
845 amdgpu_ttm_tt_mark_user_pages(ttm); 993 amdgpu_ttm_tt_mark_user_pages(ttm);
846 994
847 sg_free_table(ttm->sg); 995 sg_free_table(ttm->sg);
848} 996}
849 997
998int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
999 struct ttm_buffer_object *tbo,
1000 uint64_t flags)
1001{
1002 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
1003 struct ttm_tt *ttm = tbo->ttm;
1004 struct amdgpu_ttm_tt *gtt = (void *)ttm;
1005 int r;
1006
1007 if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) {
1008 uint64_t page_idx = 1;
1009
1010 r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
1011 ttm->pages, gtt->ttm.dma_address, flags);
1012 if (r)
1013 goto gart_bind_fail;
1014
1015 /* Patch mtype of the second part BO */
1016 flags &= ~AMDGPU_PTE_MTYPE_MASK;
1017 flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC);
1018
1019 r = amdgpu_gart_bind(adev,
1020 gtt->offset + (page_idx << PAGE_SHIFT),
1021 ttm->num_pages - page_idx,
1022 &ttm->pages[page_idx],
1023 &(gtt->ttm.dma_address[page_idx]), flags);
1024 } else {
1025 r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
1026 ttm->pages, gtt->ttm.dma_address, flags);
1027 }
1028
1029gart_bind_fail:
1030 if (r)
1031 DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
1032 ttm->num_pages, gtt->offset);
1033
1034 return r;
1035}
1036
1037/**
1038 * amdgpu_ttm_backend_bind - Bind GTT memory
1039 *
1040 * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
1041 * This handles binding GTT memory to the device address space.
1042 */
850static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, 1043static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
851 struct ttm_mem_reg *bo_mem) 1044 struct ttm_mem_reg *bo_mem)
852{ 1045{
@@ -877,7 +1070,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
877 return 0; 1070 return 0;
878 } 1071 }
879 1072
1073 /* compute PTE flags relevant to this BO memory */
880 flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); 1074 flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
1075
1076 /* bind pages into GART page tables */
881 gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; 1077 gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
882 r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, 1078 r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
883 ttm->pages, gtt->ttm.dma_address, flags); 1079 ttm->pages, gtt->ttm.dma_address, flags);
@@ -888,6 +1084,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
888 return r; 1084 return r;
889} 1085}
890 1086
1087/**
1088 * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object
1089 */
891int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) 1090int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
892{ 1091{
893 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); 1092 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
@@ -903,6 +1102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
903 amdgpu_gtt_mgr_has_gart_addr(&bo->mem)) 1102 amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
904 return 0; 1103 return 0;
905 1104
1105 /* allocate GTT space */
906 tmp = bo->mem; 1106 tmp = bo->mem;
907 tmp.mm_node = NULL; 1107 tmp.mm_node = NULL;
908 placement.num_placement = 1; 1108 placement.num_placement = 1;
@@ -918,10 +1118,12 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
918 if (unlikely(r)) 1118 if (unlikely(r))
919 return r; 1119 return r;
920 1120
1121 /* compute PTE flags for this buffer object */
921 flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); 1122 flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
1123
1124 /* Bind pages */
922 gtt->offset = (u64)tmp.start << PAGE_SHIFT; 1125 gtt->offset = (u64)tmp.start << PAGE_SHIFT;
923 r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages, 1126 r = amdgpu_ttm_gart_bind(adev, bo, flags);
924 bo->ttm->pages, gtt->ttm.dma_address, flags);
925 if (unlikely(r)) { 1127 if (unlikely(r)) {
926 ttm_bo_mem_put(bo, &tmp); 1128 ttm_bo_mem_put(bo, &tmp);
927 return r; 1129 return r;
@@ -935,31 +1137,40 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
935 return 0; 1137 return 0;
936} 1138}
937 1139
1140/**
1141 * amdgpu_ttm_recover_gart - Rebind GTT pages
1142 *
1143 * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
1144 * rebind GTT pages during a GPU reset.
1145 */
938int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) 1146int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
939{ 1147{
940 struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); 1148 struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
941 struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm;
942 uint64_t flags; 1149 uint64_t flags;
943 int r; 1150 int r;
944 1151
945 if (!gtt) 1152 if (!tbo->ttm)
946 return 0; 1153 return 0;
947 1154
948 flags = amdgpu_ttm_tt_pte_flags(adev, &gtt->ttm.ttm, &tbo->mem); 1155 flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem);
949 r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages, 1156 r = amdgpu_ttm_gart_bind(adev, tbo, flags);
950 gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags); 1157
951 if (r)
952 DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
953 gtt->ttm.ttm.num_pages, gtt->offset);
954 return r; 1158 return r;
955} 1159}
956 1160
1161/**
1162 * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
1163 *
1164 * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
1165 * ttm_tt_destroy().
1166 */
957static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) 1167static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
958{ 1168{
959 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); 1169 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
960 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1170 struct amdgpu_ttm_tt *gtt = (void *)ttm;
961 int r; 1171 int r;
962 1172
1173 /* if the pages have userptr pinning then clear that first */
963 if (gtt->userptr) 1174 if (gtt->userptr)
964 amdgpu_ttm_tt_unpin_userptr(ttm); 1175 amdgpu_ttm_tt_unpin_userptr(ttm);
965 1176
@@ -978,6 +1189,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
978{ 1189{
979 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1190 struct amdgpu_ttm_tt *gtt = (void *)ttm;
980 1191
1192 if (gtt->usertask)
1193 put_task_struct(gtt->usertask);
1194
981 ttm_dma_tt_fini(&gtt->ttm); 1195 ttm_dma_tt_fini(&gtt->ttm);
982 kfree(gtt); 1196 kfree(gtt);
983} 1197}
@@ -988,6 +1202,13 @@ static struct ttm_backend_func amdgpu_backend_func = {
988 .destroy = &amdgpu_ttm_backend_destroy, 1202 .destroy = &amdgpu_ttm_backend_destroy,
989}; 1203};
990 1204
1205/**
1206 * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
1207 *
1208 * @bo: The buffer object to create a GTT ttm_tt object around
1209 *
1210 * Called by ttm_tt_create().
1211 */
991static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, 1212static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
992 uint32_t page_flags) 1213 uint32_t page_flags)
993{ 1214{
@@ -1001,6 +1222,8 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
1001 return NULL; 1222 return NULL;
1002 } 1223 }
1003 gtt->ttm.ttm.func = &amdgpu_backend_func; 1224 gtt->ttm.ttm.func = &amdgpu_backend_func;
1225
1226 /* allocate space for the uninitialized page entries */
1004 if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) { 1227 if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
1005 kfree(gtt); 1228 kfree(gtt);
1006 return NULL; 1229 return NULL;
@@ -1008,6 +1231,12 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
1008 return &gtt->ttm.ttm; 1231 return &gtt->ttm.ttm;
1009} 1232}
1010 1233
1234/**
1235 * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
1236 *
1237 * Map the pages of a ttm_tt object to an address space visible
1238 * to the underlying device.
1239 */
1011static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, 1240static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
1012 struct ttm_operation_ctx *ctx) 1241 struct ttm_operation_ctx *ctx)
1013{ 1242{
@@ -1015,6 +1244,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
1015 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1244 struct amdgpu_ttm_tt *gtt = (void *)ttm;
1016 bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); 1245 bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
1017 1246
1247 /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
1018 if (gtt && gtt->userptr) { 1248 if (gtt && gtt->userptr) {
1019 ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); 1249 ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1020 if (!ttm->sg) 1250 if (!ttm->sg)
@@ -1039,9 +1269,17 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
1039 } 1269 }
1040#endif 1270#endif
1041 1271
1272 /* fall back to generic helper to populate the page array
1273 * and map them to the device */
1042 return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx); 1274 return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx);
1043} 1275}
1044 1276
1277/**
1278 * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
1279 *
1280 * Unmaps pages of a ttm_tt object from the device address space and
1281 * unpopulates the page array backing it.
1282 */
1045static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) 1283static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
1046{ 1284{
1047 struct amdgpu_device *adev; 1285 struct amdgpu_device *adev;
@@ -1067,9 +1305,21 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
1067 } 1305 }
1068#endif 1306#endif
1069 1307
1308 /* fall back to generic helper to unmap and unpopulate array */
1070 ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm); 1309 ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm);
1071} 1310}
1072 1311
1312/**
1313 * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt
1314 * for the current task
1315 *
1316 * @ttm: The ttm_tt object to bind this userptr object to
1317 * @addr: The address in the current tasks VM space to use
1318 * @flags: Requirements of userptr object.
1319 *
1320 * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
1321 * to current task
1322 */
1073int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, 1323int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1074 uint32_t flags) 1324 uint32_t flags)
1075{ 1325{
@@ -1079,8 +1329,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1079 return -EINVAL; 1329 return -EINVAL;
1080 1330
1081 gtt->userptr = addr; 1331 gtt->userptr = addr;
1082 gtt->usermm = current->mm;
1083 gtt->userflags = flags; 1332 gtt->userflags = flags;
1333
1334 if (gtt->usertask)
1335 put_task_struct(gtt->usertask);
1336 gtt->usertask = current->group_leader;
1337 get_task_struct(gtt->usertask);
1338
1084 spin_lock_init(&gtt->guptasklock); 1339 spin_lock_init(&gtt->guptasklock);
1085 INIT_LIST_HEAD(&gtt->guptasks); 1340 INIT_LIST_HEAD(&gtt->guptasks);
1086 atomic_set(&gtt->mmu_invalidations, 0); 1341 atomic_set(&gtt->mmu_invalidations, 0);
@@ -1089,6 +1344,9 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1089 return 0; 1344 return 0;
1090} 1345}
1091 1346
1347/**
1348 * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
1349 */
1092struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) 1350struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
1093{ 1351{
1094 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1352 struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1096,9 +1354,18 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
1096 if (gtt == NULL) 1354 if (gtt == NULL)
1097 return NULL; 1355 return NULL;
1098 1356
1099 return gtt->usermm; 1357 if (gtt->usertask == NULL)
1358 return NULL;
1359
1360 return gtt->usertask->mm;
1100} 1361}
1101 1362
1363/**
1364 * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays
1365 * inside an address range for the
1366 * current task.
1367 *
1368 */
1102bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, 1369bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1103 unsigned long end) 1370 unsigned long end)
1104{ 1371{
@@ -1109,10 +1376,16 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1109 if (gtt == NULL || !gtt->userptr) 1376 if (gtt == NULL || !gtt->userptr)
1110 return false; 1377 return false;
1111 1378
1379 /* Return false if no part of the ttm_tt object lies within
1380 * the range
1381 */
1112 size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE; 1382 size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
1113 if (gtt->userptr > end || gtt->userptr + size <= start) 1383 if (gtt->userptr > end || gtt->userptr + size <= start)
1114 return false; 1384 return false;
1115 1385
1386 /* Search the lists of tasks that hold this mapping and see
1387 * if current is one of them. If it is return false.
1388 */
1116 spin_lock(&gtt->guptasklock); 1389 spin_lock(&gtt->guptasklock);
1117 list_for_each_entry(entry, &gtt->guptasks, list) { 1390 list_for_each_entry(entry, &gtt->guptasks, list) {
1118 if (entry->task == current) { 1391 if (entry->task == current) {
@@ -1127,6 +1400,10 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1127 return true; 1400 return true;
1128} 1401}
1129 1402
1403/**
1404 * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been
1405 * invalidated?
1406 */
1130bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, 1407bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
1131 int *last_invalidated) 1408 int *last_invalidated)
1132{ 1409{
@@ -1137,6 +1414,12 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
1137 return prev_invalidated != *last_invalidated; 1414 return prev_invalidated != *last_invalidated;
1138} 1415}
1139 1416
1417/**
1418 * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this
1419 * ttm_tt object been invalidated
1420 * since the last time they've
1421 * been set?
1422 */
1140bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) 1423bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
1141{ 1424{
1142 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1425 struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1147,6 +1430,9 @@ bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
1147 return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages; 1430 return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
1148} 1431}
1149 1432
1433/**
1434 * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
1435 */
1150bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) 1436bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1151{ 1437{
1152 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1438 struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1157,6 +1443,12 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1157 return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); 1443 return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1158} 1444}
1159 1445
1446/**
1447 * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
1448 *
1449 * @ttm: The ttm_tt object to compute the flags for
1450 * @mem: The memory registry backing this ttm_tt object
1451 */
1160uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, 1452uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1161 struct ttm_mem_reg *mem) 1453 struct ttm_mem_reg *mem)
1162{ 1454{
@@ -1181,6 +1473,16 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1181 return flags; 1473 return flags;
1182} 1474}
1183 1475
1476/**
1477 * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict
1478 * a buffer object.
1479 *
1480 * Return true if eviction is sensible. Called by
1481 * ttm_mem_evict_first() on behalf of ttm_bo_mem_force_space()
1482 * which tries to evict buffer objects until it can find space
1483 * for a new object and by ttm_bo_force_list_clean() which is
1484 * used to clean out a memory space.
1485 */
1184static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, 1486static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1185 const struct ttm_place *place) 1487 const struct ttm_place *place)
1186{ 1488{
@@ -1227,6 +1529,19 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1227 return ttm_bo_eviction_valuable(bo, place); 1529 return ttm_bo_eviction_valuable(bo, place);
1228} 1530}
1229 1531
1532/**
1533 * amdgpu_ttm_access_memory - Read or Write memory that backs a
1534 * buffer object.
1535 *
1536 * @bo: The buffer object to read/write
1537 * @offset: Offset into buffer object
1538 * @buf: Secondary buffer to write/read from
1539 * @len: Length in bytes of access
1540 * @write: true if writing
1541 *
1542 * This is used to access VRAM that backs a buffer object via MMIO
1543 * access for debugging purposes.
1544 */
1230static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, 1545static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1231 unsigned long offset, 1546 unsigned long offset,
1232 void *buf, int len, int write) 1547 void *buf, int len, int write)
@@ -1329,6 +1644,7 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
1329static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) 1644static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
1330{ 1645{
1331 struct ttm_operation_ctx ctx = { false, false }; 1646 struct ttm_operation_ctx ctx = { false, false };
1647 struct amdgpu_bo_param bp;
1332 int r = 0; 1648 int r = 0;
1333 int i; 1649 int i;
1334 u64 vram_size = adev->gmc.visible_vram_size; 1650 u64 vram_size = adev->gmc.visible_vram_size;
@@ -1336,17 +1652,21 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
1336 u64 size = adev->fw_vram_usage.size; 1652 u64 size = adev->fw_vram_usage.size;
1337 struct amdgpu_bo *bo; 1653 struct amdgpu_bo *bo;
1338 1654
1655 memset(&bp, 0, sizeof(bp));
1656 bp.size = adev->fw_vram_usage.size;
1657 bp.byte_align = PAGE_SIZE;
1658 bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
1659 bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1660 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1661 bp.type = ttm_bo_type_kernel;
1662 bp.resv = NULL;
1339 adev->fw_vram_usage.va = NULL; 1663 adev->fw_vram_usage.va = NULL;
1340 adev->fw_vram_usage.reserved_bo = NULL; 1664 adev->fw_vram_usage.reserved_bo = NULL;
1341 1665
1342 if (adev->fw_vram_usage.size > 0 && 1666 if (adev->fw_vram_usage.size > 0 &&
1343 adev->fw_vram_usage.size <= vram_size) { 1667 adev->fw_vram_usage.size <= vram_size) {
1344 1668
1345 r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE, 1669 r = amdgpu_bo_create(adev, &bp,
1346 AMDGPU_GEM_DOMAIN_VRAM,
1347 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1348 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1349 ttm_bo_type_kernel, NULL,
1350 &adev->fw_vram_usage.reserved_bo); 1670 &adev->fw_vram_usage.reserved_bo);
1351 if (r) 1671 if (r)
1352 goto error_create; 1672 goto error_create;
@@ -1398,13 +1718,22 @@ error_create:
1398 adev->fw_vram_usage.reserved_bo = NULL; 1718 adev->fw_vram_usage.reserved_bo = NULL;
1399 return r; 1719 return r;
1400} 1720}
1401 1721/**
1722 * amdgpu_ttm_init - Init the memory management (ttm) as well as
1723 * various gtt/vram related fields.
1724 *
1725 * This initializes all of the memory space pools that the TTM layer
1726 * will need such as the GTT space (system memory mapped to the device),
1727 * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
1728 * can be mapped per VMID.
1729 */
1402int amdgpu_ttm_init(struct amdgpu_device *adev) 1730int amdgpu_ttm_init(struct amdgpu_device *adev)
1403{ 1731{
1404 uint64_t gtt_size; 1732 uint64_t gtt_size;
1405 int r; 1733 int r;
1406 u64 vis_vram_limit; 1734 u64 vis_vram_limit;
1407 1735
1736 /* initialize global references for vram/gtt */
1408 r = amdgpu_ttm_global_init(adev); 1737 r = amdgpu_ttm_global_init(adev);
1409 if (r) { 1738 if (r) {
1410 return r; 1739 return r;
@@ -1425,6 +1754,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1425 /* We opt to avoid OOM on system pages allocations */ 1754 /* We opt to avoid OOM on system pages allocations */
1426 adev->mman.bdev.no_retry = true; 1755 adev->mman.bdev.no_retry = true;
1427 1756
1757 /* Initialize VRAM pool with all of VRAM divided into pages */
1428 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, 1758 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
1429 adev->gmc.real_vram_size >> PAGE_SHIFT); 1759 adev->gmc.real_vram_size >> PAGE_SHIFT);
1430 if (r) { 1760 if (r) {
@@ -1454,15 +1784,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1454 return r; 1784 return r;
1455 } 1785 }
1456 1786
1457 r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, 1787 /* allocate memory as required for VGA
1458 AMDGPU_GEM_DOMAIN_VRAM, 1788 * This is used for VGA emulation and pre-OS scanout buffers to
1459 &adev->stolen_vga_memory, 1789 * avoid display artifacts while transitioning between pre-OS
1460 NULL, NULL); 1790 * and driver. */
1461 if (r) 1791 if (adev->gmc.stolen_size) {
1462 return r; 1792 r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
1793 AMDGPU_GEM_DOMAIN_VRAM,
1794 &adev->stolen_vga_memory,
1795 NULL, NULL);
1796 if (r)
1797 return r;
1798 }
1463 DRM_INFO("amdgpu: %uM of VRAM memory ready\n", 1799 DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1464 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); 1800 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
1465 1801
1802 /* Compute GTT size, either bsaed on 3/4th the size of RAM size
1803 * or whatever the user passed on module init */
1466 if (amdgpu_gtt_size == -1) { 1804 if (amdgpu_gtt_size == -1) {
1467 struct sysinfo si; 1805 struct sysinfo si;
1468 1806
@@ -1473,6 +1811,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1473 } 1811 }
1474 else 1812 else
1475 gtt_size = (uint64_t)amdgpu_gtt_size << 20; 1813 gtt_size = (uint64_t)amdgpu_gtt_size << 20;
1814
1815 /* Initialize GTT memory pool */
1476 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); 1816 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
1477 if (r) { 1817 if (r) {
1478 DRM_ERROR("Failed initializing GTT heap.\n"); 1818 DRM_ERROR("Failed initializing GTT heap.\n");
@@ -1481,6 +1821,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1481 DRM_INFO("amdgpu: %uM of GTT memory ready.\n", 1821 DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1482 (unsigned)(gtt_size / (1024 * 1024))); 1822 (unsigned)(gtt_size / (1024 * 1024)));
1483 1823
1824 /* Initialize various on-chip memory pools */
1484 adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; 1825 adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
1485 adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; 1826 adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
1486 adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT; 1827 adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
@@ -1520,6 +1861,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1520 } 1861 }
1521 } 1862 }
1522 1863
1864 /* Register debugfs entries for amdgpu_ttm */
1523 r = amdgpu_ttm_debugfs_init(adev); 1865 r = amdgpu_ttm_debugfs_init(adev);
1524 if (r) { 1866 if (r) {
1525 DRM_ERROR("Failed to init debugfs\n"); 1867 DRM_ERROR("Failed to init debugfs\n");
@@ -1528,13 +1870,25 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1528 return 0; 1870 return 0;
1529} 1871}
1530 1872
1873/**
1874 * amdgpu_ttm_late_init - Handle any late initialization for
1875 * amdgpu_ttm
1876 */
1877void amdgpu_ttm_late_init(struct amdgpu_device *adev)
1878{
1879 /* return the VGA stolen memory (if any) back to VRAM */
1880 amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
1881}
1882
1883/**
1884 * amdgpu_ttm_fini - De-initialize the TTM memory pools
1885 */
1531void amdgpu_ttm_fini(struct amdgpu_device *adev) 1886void amdgpu_ttm_fini(struct amdgpu_device *adev)
1532{ 1887{
1533 if (!adev->mman.initialized) 1888 if (!adev->mman.initialized)
1534 return; 1889 return;
1535 1890
1536 amdgpu_ttm_debugfs_fini(adev); 1891 amdgpu_ttm_debugfs_fini(adev);
1537 amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
1538 amdgpu_ttm_fw_reserve_vram_fini(adev); 1892 amdgpu_ttm_fw_reserve_vram_fini(adev);
1539 if (adev->mman.aper_base_kaddr) 1893 if (adev->mman.aper_base_kaddr)
1540 iounmap(adev->mman.aper_base_kaddr); 1894 iounmap(adev->mman.aper_base_kaddr);
@@ -1856,6 +2210,11 @@ static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
1856#endif 2210#endif
1857}; 2211};
1858 2212
2213/**
2214 * amdgpu_ttm_vram_read - Linear read access to VRAM
2215 *
2216 * Accesses VRAM via MMIO for debugging purposes.
2217 */
1859static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, 2218static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
1860 size_t size, loff_t *pos) 2219 size_t size, loff_t *pos)
1861{ 2220{
@@ -1895,6 +2254,11 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
1895 return result; 2254 return result;
1896} 2255}
1897 2256
2257/**
2258 * amdgpu_ttm_vram_write - Linear write access to VRAM
2259 *
2260 * Accesses VRAM via MMIO for debugging purposes.
2261 */
1898static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, 2262static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
1899 size_t size, loff_t *pos) 2263 size_t size, loff_t *pos)
1900{ 2264{
@@ -1943,6 +2307,9 @@ static const struct file_operations amdgpu_ttm_vram_fops = {
1943 2307
1944#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS 2308#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1945 2309
2310/**
2311 * amdgpu_ttm_gtt_read - Linear read access to GTT memory
2312 */
1946static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf, 2313static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
1947 size_t size, loff_t *pos) 2314 size_t size, loff_t *pos)
1948{ 2315{
@@ -1990,6 +2357,13 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {
1990 2357
1991#endif 2358#endif
1992 2359
2360/**
2361 * amdgpu_iomem_read - Virtual read access to GPU mapped memory
2362 *
2363 * This function is used to read memory that has been mapped to the
2364 * GPU and the known addresses are not physical addresses but instead
2365 * bus addresses (e.g., what you'd put in an IB or ring buffer).
2366 */
1993static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, 2367static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
1994 size_t size, loff_t *pos) 2368 size_t size, loff_t *pos)
1995{ 2369{
@@ -1998,6 +2372,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
1998 ssize_t result = 0; 2372 ssize_t result = 0;
1999 int r; 2373 int r;
2000 2374
2375 /* retrieve the IOMMU domain if any for this device */
2001 dom = iommu_get_domain_for_dev(adev->dev); 2376 dom = iommu_get_domain_for_dev(adev->dev);
2002 2377
2003 while (size) { 2378 while (size) {
@@ -2010,6 +2385,10 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
2010 2385
2011 bytes = bytes < size ? bytes : size; 2386 bytes = bytes < size ? bytes : size;
2012 2387
2388 /* Translate the bus address to a physical address. If
2389 * the domain is NULL it means there is no IOMMU active
2390 * and the address translation is the identity
2391 */
2013 addr = dom ? iommu_iova_to_phys(dom, addr) : addr; 2392 addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
2014 2393
2015 pfn = addr >> PAGE_SHIFT; 2394 pfn = addr >> PAGE_SHIFT;
@@ -2034,6 +2413,13 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
2034 return result; 2413 return result;
2035} 2414}
2036 2415
2416/**
2417 * amdgpu_iomem_write - Virtual write access to GPU mapped memory
2418 *
2419 * This function is used to write memory that has been mapped to the
2420 * GPU and the known addresses are not physical addresses but instead
2421 * bus addresses (e.g., what you'd put in an IB or ring buffer).
2422 */
2037static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, 2423static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
2038 size_t size, loff_t *pos) 2424 size_t size, loff_t *pos)
2039{ 2425{