diff options
author | Felix Kuehling <Felix.Kuehling@amd.com> | 2018-11-20 21:44:27 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2018-12-07 18:14:00 -0500 |
commit | b408a548846f2343716351d55a6c9af9e73ec32c (patch) | |
tree | fc67d39a714e6276d23715f0f4fe6ed76ba90ef6 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | |
parent | 1dde0ea95b782425b95455d487cb44991525a1d1 (diff) |
drm/amdkfd: Add support for doorbell BOs
This allows user mode to map doorbell pages into GPUVM address space.
That way GPUs can submit to user mode queues (self-dispatch).
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 59 |
1 files changed, 54 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a0a500d45886..be1ab43473c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | |||
@@ -887,6 +887,24 @@ update_gpuvm_pte_failed: | |||
887 | return ret; | 887 | return ret; |
888 | } | 888 | } |
889 | 889 | ||
890 | static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) | ||
891 | { | ||
892 | struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); | ||
893 | |||
894 | if (!sg) | ||
895 | return NULL; | ||
896 | if (sg_alloc_table(sg, 1, GFP_KERNEL)) { | ||
897 | kfree(sg); | ||
898 | return NULL; | ||
899 | } | ||
900 | sg->sgl->dma_address = addr; | ||
901 | sg->sgl->length = size; | ||
902 | #ifdef CONFIG_NEED_SG_DMA_LENGTH | ||
903 | sg->sgl->dma_length = size; | ||
904 | #endif | ||
905 | return sg; | ||
906 | } | ||
907 | |||
890 | static int process_validate_vms(struct amdkfd_process_info *process_info) | 908 | static int process_validate_vms(struct amdkfd_process_info *process_info) |
891 | { | 909 | { |
892 | struct amdgpu_vm *peer_vm; | 910 | struct amdgpu_vm *peer_vm; |
@@ -1170,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
1170 | { | 1188 | { |
1171 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 1189 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
1172 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; | 1190 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; |
1191 | enum ttm_bo_type bo_type = ttm_bo_type_device; | ||
1192 | struct sg_table *sg = NULL; | ||
1173 | uint64_t user_addr = 0; | 1193 | uint64_t user_addr = 0; |
1174 | struct amdgpu_bo *bo; | 1194 | struct amdgpu_bo *bo; |
1175 | struct amdgpu_bo_param bp; | 1195 | struct amdgpu_bo_param bp; |
@@ -1198,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
1198 | if (!offset || !*offset) | 1218 | if (!offset || !*offset) |
1199 | return -EINVAL; | 1219 | return -EINVAL; |
1200 | user_addr = *offset; | 1220 | user_addr = *offset; |
1221 | } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { | ||
1222 | domain = AMDGPU_GEM_DOMAIN_GTT; | ||
1223 | alloc_domain = AMDGPU_GEM_DOMAIN_CPU; | ||
1224 | bo_type = ttm_bo_type_sg; | ||
1225 | alloc_flags = 0; | ||
1226 | if (size > UINT_MAX) | ||
1227 | return -EINVAL; | ||
1228 | sg = create_doorbell_sg(*offset, size); | ||
1229 | if (!sg) | ||
1230 | return -ENOMEM; | ||
1201 | } else { | 1231 | } else { |
1202 | return -EINVAL; | 1232 | return -EINVAL; |
1203 | } | 1233 | } |
1204 | 1234 | ||
1205 | *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); | 1235 | *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); |
1206 | if (!*mem) | 1236 | if (!*mem) { |
1207 | return -ENOMEM; | 1237 | ret = -ENOMEM; |
1238 | goto err; | ||
1239 | } | ||
1208 | INIT_LIST_HEAD(&(*mem)->bo_va_list); | 1240 | INIT_LIST_HEAD(&(*mem)->bo_va_list); |
1209 | mutex_init(&(*mem)->lock); | 1241 | mutex_init(&(*mem)->lock); |
1210 | (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); | 1242 | (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); |
@@ -1237,7 +1269,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
1237 | 1269 | ||
1238 | amdgpu_sync_create(&(*mem)->sync); | 1270 | amdgpu_sync_create(&(*mem)->sync); |
1239 | 1271 | ||
1240 | ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, false); | 1272 | ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); |
1241 | if (ret) { | 1273 | if (ret) { |
1242 | pr_debug("Insufficient system memory\n"); | 1274 | pr_debug("Insufficient system memory\n"); |
1243 | goto err_reserve_limit; | 1275 | goto err_reserve_limit; |
@@ -1251,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
1251 | bp.byte_align = byte_align; | 1283 | bp.byte_align = byte_align; |
1252 | bp.domain = alloc_domain; | 1284 | bp.domain = alloc_domain; |
1253 | bp.flags = alloc_flags; | 1285 | bp.flags = alloc_flags; |
1254 | bp.type = ttm_bo_type_device; | 1286 | bp.type = bo_type; |
1255 | bp.resv = NULL; | 1287 | bp.resv = NULL; |
1256 | ret = amdgpu_bo_create(adev, &bp, &bo); | 1288 | ret = amdgpu_bo_create(adev, &bp, &bo); |
1257 | if (ret) { | 1289 | if (ret) { |
@@ -1259,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
1259 | domain_string(alloc_domain), ret); | 1291 | domain_string(alloc_domain), ret); |
1260 | goto err_bo_create; | 1292 | goto err_bo_create; |
1261 | } | 1293 | } |
1294 | if (bo_type == ttm_bo_type_sg) { | ||
1295 | bo->tbo.sg = sg; | ||
1296 | bo->tbo.ttm->sg = sg; | ||
1297 | } | ||
1262 | bo->kfd_bo = *mem; | 1298 | bo->kfd_bo = *mem; |
1263 | (*mem)->bo = bo; | 1299 | (*mem)->bo = bo; |
1264 | if (user_addr) | 1300 | if (user_addr) |
@@ -1290,10 +1326,15 @@ allocate_init_user_pages_failed: | |||
1290 | /* Don't unreserve system mem limit twice */ | 1326 | /* Don't unreserve system mem limit twice */ |
1291 | goto err_reserve_limit; | 1327 | goto err_reserve_limit; |
1292 | err_bo_create: | 1328 | err_bo_create: |
1293 | unreserve_mem_limit(adev, size, alloc_domain, false); | 1329 | unreserve_mem_limit(adev, size, alloc_domain, !!sg); |
1294 | err_reserve_limit: | 1330 | err_reserve_limit: |
1295 | mutex_destroy(&(*mem)->lock); | 1331 | mutex_destroy(&(*mem)->lock); |
1296 | kfree(*mem); | 1332 | kfree(*mem); |
1333 | err: | ||
1334 | if (sg) { | ||
1335 | sg_free_table(sg); | ||
1336 | kfree(sg); | ||
1337 | } | ||
1297 | return ret; | 1338 | return ret; |
1298 | } | 1339 | } |
1299 | 1340 | ||
@@ -1363,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( | |||
1363 | /* Free the sync object */ | 1404 | /* Free the sync object */ |
1364 | amdgpu_sync_free(&mem->sync); | 1405 | amdgpu_sync_free(&mem->sync); |
1365 | 1406 | ||
1407 | /* If the SG is not NULL, it's one we created for a doorbell | ||
1408 | * BO. We need to free it. | ||
1409 | */ | ||
1410 | if (mem->bo->tbo.sg) { | ||
1411 | sg_free_table(mem->bo->tbo.sg); | ||
1412 | kfree(mem->bo->tbo.sg); | ||
1413 | } | ||
1414 | |||
1366 | /* Free the BO*/ | 1415 | /* Free the BO*/ |
1367 | amdgpu_bo_unref(&mem->bo); | 1416 | amdgpu_bo_unref(&mem->bo); |
1368 | mutex_destroy(&mem->lock); | 1417 | mutex_destroy(&mem->lock); |