aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2018-11-20 21:44:27 -0500
committerAlex Deucher <alexander.deucher@amd.com>2018-12-07 18:14:00 -0500
commitb408a548846f2343716351d55a6c9af9e73ec32c (patch)
treefc67d39a714e6276d23715f0f4fe6ed76ba90ef6 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
parent1dde0ea95b782425b95455d487cb44991525a1d1 (diff)
drm/amdkfd: Add support for doorbell BOs
This allows user mode to map doorbell pages into GPUVM address space. That way GPUs can submit to user mode queues (self-dispatch). Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c59
1 files changed, 54 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a0a500d45886..be1ab43473c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -887,6 +887,24 @@ update_gpuvm_pte_failed:
887 return ret; 887 return ret;
888} 888}
889 889
890static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
891{
892 struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
893
894 if (!sg)
895 return NULL;
896 if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
897 kfree(sg);
898 return NULL;
899 }
900 sg->sgl->dma_address = addr;
901 sg->sgl->length = size;
902#ifdef CONFIG_NEED_SG_DMA_LENGTH
903 sg->sgl->dma_length = size;
904#endif
905 return sg;
906}
907
890static int process_validate_vms(struct amdkfd_process_info *process_info) 908static int process_validate_vms(struct amdkfd_process_info *process_info)
891{ 909{
892 struct amdgpu_vm *peer_vm; 910 struct amdgpu_vm *peer_vm;
@@ -1170,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1170{ 1188{
1171 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1189 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1172 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1190 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1191 enum ttm_bo_type bo_type = ttm_bo_type_device;
1192 struct sg_table *sg = NULL;
1173 uint64_t user_addr = 0; 1193 uint64_t user_addr = 0;
1174 struct amdgpu_bo *bo; 1194 struct amdgpu_bo *bo;
1175 struct amdgpu_bo_param bp; 1195 struct amdgpu_bo_param bp;
@@ -1198,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1198 if (!offset || !*offset) 1218 if (!offset || !*offset)
1199 return -EINVAL; 1219 return -EINVAL;
1200 user_addr = *offset; 1220 user_addr = *offset;
1221 } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
1222 domain = AMDGPU_GEM_DOMAIN_GTT;
1223 alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
1224 bo_type = ttm_bo_type_sg;
1225 alloc_flags = 0;
1226 if (size > UINT_MAX)
1227 return -EINVAL;
1228 sg = create_doorbell_sg(*offset, size);
1229 if (!sg)
1230 return -ENOMEM;
1201 } else { 1231 } else {
1202 return -EINVAL; 1232 return -EINVAL;
1203 } 1233 }
1204 1234
1205 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); 1235 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
1206 if (!*mem) 1236 if (!*mem) {
1207 return -ENOMEM; 1237 ret = -ENOMEM;
1238 goto err;
1239 }
1208 INIT_LIST_HEAD(&(*mem)->bo_va_list); 1240 INIT_LIST_HEAD(&(*mem)->bo_va_list);
1209 mutex_init(&(*mem)->lock); 1241 mutex_init(&(*mem)->lock);
1210 (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); 1242 (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
@@ -1237,7 +1269,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1237 1269
1238 amdgpu_sync_create(&(*mem)->sync); 1270 amdgpu_sync_create(&(*mem)->sync);
1239 1271
1240 ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, false); 1272 ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
1241 if (ret) { 1273 if (ret) {
1242 pr_debug("Insufficient system memory\n"); 1274 pr_debug("Insufficient system memory\n");
1243 goto err_reserve_limit; 1275 goto err_reserve_limit;
@@ -1251,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1251 bp.byte_align = byte_align; 1283 bp.byte_align = byte_align;
1252 bp.domain = alloc_domain; 1284 bp.domain = alloc_domain;
1253 bp.flags = alloc_flags; 1285 bp.flags = alloc_flags;
1254 bp.type = ttm_bo_type_device; 1286 bp.type = bo_type;
1255 bp.resv = NULL; 1287 bp.resv = NULL;
1256 ret = amdgpu_bo_create(adev, &bp, &bo); 1288 ret = amdgpu_bo_create(adev, &bp, &bo);
1257 if (ret) { 1289 if (ret) {
@@ -1259,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1259 domain_string(alloc_domain), ret); 1291 domain_string(alloc_domain), ret);
1260 goto err_bo_create; 1292 goto err_bo_create;
1261 } 1293 }
1294 if (bo_type == ttm_bo_type_sg) {
1295 bo->tbo.sg = sg;
1296 bo->tbo.ttm->sg = sg;
1297 }
1262 bo->kfd_bo = *mem; 1298 bo->kfd_bo = *mem;
1263 (*mem)->bo = bo; 1299 (*mem)->bo = bo;
1264 if (user_addr) 1300 if (user_addr)
@@ -1290,10 +1326,15 @@ allocate_init_user_pages_failed:
1290 /* Don't unreserve system mem limit twice */ 1326 /* Don't unreserve system mem limit twice */
1291 goto err_reserve_limit; 1327 goto err_reserve_limit;
1292err_bo_create: 1328err_bo_create:
1293 unreserve_mem_limit(adev, size, alloc_domain, false); 1329 unreserve_mem_limit(adev, size, alloc_domain, !!sg);
1294err_reserve_limit: 1330err_reserve_limit:
1295 mutex_destroy(&(*mem)->lock); 1331 mutex_destroy(&(*mem)->lock);
1296 kfree(*mem); 1332 kfree(*mem);
1333err:
1334 if (sg) {
1335 sg_free_table(sg);
1336 kfree(sg);
1337 }
1297 return ret; 1338 return ret;
1298} 1339}
1299 1340
@@ -1363,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
1363 /* Free the sync object */ 1404 /* Free the sync object */
1364 amdgpu_sync_free(&mem->sync); 1405 amdgpu_sync_free(&mem->sync);
1365 1406
1407 /* If the SG is not NULL, it's one we created for a doorbell
1408 * BO. We need to free it.
1409 */
1410 if (mem->bo->tbo.sg) {
1411 sg_free_table(mem->bo->tbo.sg);
1412 kfree(mem->bo->tbo.sg);
1413 }
1414
1366 /* Free the BO*/ 1415 /* Free the BO*/
1367 amdgpu_bo_unref(&mem->bo); 1416 amdgpu_bo_unref(&mem->bo);
1368 mutex_destroy(&mem->lock); 1417 mutex_destroy(&mem->lock);