diff options
author | Eric Huang <JinhuiEric.Huang@amd.com> | 2018-09-05 11:46:14 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2018-11-20 14:01:54 -0500 |
commit | 5d240da93edc29adb68320c5e475dc9c7fcad5dd (patch) | |
tree | 44551f46cc7a22198a922e187b8e28ceb16bf1a7 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | |
parent | 0f04e538580c086046e3b31cbc7ef4307a5b771d (diff) |
drm/amdkfd: change system memory overcommit limit
It is to improve system limit by:
1. replacing userptrlimit with a total memory limit that
conunts TTM memory usage and userptr usage.
2. counting acc size for all BOs.
Signed-off-by: Eric Huang <JinHuiEric.Huang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 99 |
1 files changed, 58 insertions, 41 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 84e4c1e4d109..f3129b912714 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | |||
@@ -46,9 +46,9 @@ | |||
46 | /* Impose limit on how much memory KFD can use */ | 46 | /* Impose limit on how much memory KFD can use */ |
47 | static struct { | 47 | static struct { |
48 | uint64_t max_system_mem_limit; | 48 | uint64_t max_system_mem_limit; |
49 | uint64_t max_userptr_mem_limit; | 49 | uint64_t max_ttm_mem_limit; |
50 | int64_t system_mem_used; | 50 | int64_t system_mem_used; |
51 | int64_t userptr_mem_used; | 51 | int64_t ttm_mem_used; |
52 | spinlock_t mem_limit_lock; | 52 | spinlock_t mem_limit_lock; |
53 | } kfd_mem_limit; | 53 | } kfd_mem_limit; |
54 | 54 | ||
@@ -90,8 +90,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, | |||
90 | } | 90 | } |
91 | 91 | ||
92 | /* Set memory usage limits. Current, limits are | 92 | /* Set memory usage limits. Current, limits are |
93 | * System (kernel) memory - 3/8th System RAM | 93 | * System (TTM + userptr) memory - 3/4th System RAM |
94 | * Userptr memory - 3/4th System RAM | 94 | * TTM memory - 3/8th System RAM |
95 | */ | 95 | */ |
96 | void amdgpu_amdkfd_gpuvm_init_mem_limits(void) | 96 | void amdgpu_amdkfd_gpuvm_init_mem_limits(void) |
97 | { | 97 | { |
@@ -103,48 +103,54 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) | |||
103 | mem *= si.mem_unit; | 103 | mem *= si.mem_unit; |
104 | 104 | ||
105 | spin_lock_init(&kfd_mem_limit.mem_limit_lock); | 105 | spin_lock_init(&kfd_mem_limit.mem_limit_lock); |
106 | kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); | 106 | kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2); |
107 | kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); | 107 | kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); |
108 | pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", | 108 | pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", |
109 | (kfd_mem_limit.max_system_mem_limit >> 20), | 109 | (kfd_mem_limit.max_system_mem_limit >> 20), |
110 | (kfd_mem_limit.max_userptr_mem_limit >> 20)); | 110 | (kfd_mem_limit.max_ttm_mem_limit >> 20)); |
111 | } | 111 | } |
112 | 112 | ||
113 | static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, | 113 | static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, |
114 | uint64_t size, u32 domain) | 114 | uint64_t size, u32 domain, bool sg) |
115 | { | 115 | { |
116 | size_t acc_size; | 116 | size_t acc_size, system_mem_needed, ttm_mem_needed; |
117 | int ret = 0; | 117 | int ret = 0; |
118 | 118 | ||
119 | acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, | 119 | acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, |
120 | sizeof(struct amdgpu_bo)); | 120 | sizeof(struct amdgpu_bo)); |
121 | 121 | ||
122 | spin_lock(&kfd_mem_limit.mem_limit_lock); | 122 | spin_lock(&kfd_mem_limit.mem_limit_lock); |
123 | |||
123 | if (domain == AMDGPU_GEM_DOMAIN_GTT) { | 124 | if (domain == AMDGPU_GEM_DOMAIN_GTT) { |
124 | if (kfd_mem_limit.system_mem_used + (acc_size + size) > | 125 | /* TTM GTT memory */ |
125 | kfd_mem_limit.max_system_mem_limit) { | 126 | system_mem_needed = acc_size + size; |
126 | ret = -ENOMEM; | 127 | ttm_mem_needed = acc_size + size; |
127 | goto err_no_mem; | 128 | } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { |
128 | } | 129 | /* Userptr */ |
129 | kfd_mem_limit.system_mem_used += (acc_size + size); | 130 | system_mem_needed = acc_size + size; |
130 | } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { | 131 | ttm_mem_needed = acc_size; |
131 | if ((kfd_mem_limit.system_mem_used + acc_size > | 132 | } else { |
132 | kfd_mem_limit.max_system_mem_limit) || | 133 | /* VRAM and SG */ |
133 | (kfd_mem_limit.userptr_mem_used + (size + acc_size) > | 134 | system_mem_needed = acc_size; |
134 | kfd_mem_limit.max_userptr_mem_limit)) { | 135 | ttm_mem_needed = acc_size; |
135 | ret = -ENOMEM; | 136 | } |
136 | goto err_no_mem; | 137 | |
137 | } | 138 | if ((kfd_mem_limit.system_mem_used + system_mem_needed > |
138 | kfd_mem_limit.system_mem_used += acc_size; | 139 | kfd_mem_limit.max_system_mem_limit) || |
139 | kfd_mem_limit.userptr_mem_used += size; | 140 | (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > |
141 | kfd_mem_limit.max_ttm_mem_limit)) | ||
142 | ret = -ENOMEM; | ||
143 | else { | ||
144 | kfd_mem_limit.system_mem_used += system_mem_needed; | ||
145 | kfd_mem_limit.ttm_mem_used += ttm_mem_needed; | ||
140 | } | 146 | } |
141 | err_no_mem: | 147 | |
142 | spin_unlock(&kfd_mem_limit.mem_limit_lock); | 148 | spin_unlock(&kfd_mem_limit.mem_limit_lock); |
143 | return ret; | 149 | return ret; |
144 | } | 150 | } |
145 | 151 | ||
146 | static void unreserve_system_mem_limit(struct amdgpu_device *adev, | 152 | static void unreserve_system_mem_limit(struct amdgpu_device *adev, |
147 | uint64_t size, u32 domain) | 153 | uint64_t size, u32 domain, bool sg) |
148 | { | 154 | { |
149 | size_t acc_size; | 155 | size_t acc_size; |
150 | 156 | ||
@@ -154,14 +160,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, | |||
154 | spin_lock(&kfd_mem_limit.mem_limit_lock); | 160 | spin_lock(&kfd_mem_limit.mem_limit_lock); |
155 | if (domain == AMDGPU_GEM_DOMAIN_GTT) { | 161 | if (domain == AMDGPU_GEM_DOMAIN_GTT) { |
156 | kfd_mem_limit.system_mem_used -= (acc_size + size); | 162 | kfd_mem_limit.system_mem_used -= (acc_size + size); |
157 | } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { | 163 | kfd_mem_limit.ttm_mem_used -= (acc_size + size); |
164 | } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { | ||
165 | kfd_mem_limit.system_mem_used -= (acc_size + size); | ||
166 | kfd_mem_limit.ttm_mem_used -= acc_size; | ||
167 | } else { | ||
158 | kfd_mem_limit.system_mem_used -= acc_size; | 168 | kfd_mem_limit.system_mem_used -= acc_size; |
159 | kfd_mem_limit.userptr_mem_used -= size; | 169 | kfd_mem_limit.ttm_mem_used -= acc_size; |
160 | } | 170 | } |
161 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, | 171 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, |
162 | "kfd system memory accounting unbalanced"); | 172 | "kfd system memory accounting unbalanced"); |
163 | WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, | 173 | WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, |
164 | "kfd userptr memory accounting unbalanced"); | 174 | "kfd TTM memory accounting unbalanced"); |
165 | 175 | ||
166 | spin_unlock(&kfd_mem_limit.mem_limit_lock); | 176 | spin_unlock(&kfd_mem_limit.mem_limit_lock); |
167 | } | 177 | } |
@@ -171,16 +181,22 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) | |||
171 | spin_lock(&kfd_mem_limit.mem_limit_lock); | 181 | spin_lock(&kfd_mem_limit.mem_limit_lock); |
172 | 182 | ||
173 | if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { | 183 | if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { |
174 | kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; | 184 | kfd_mem_limit.system_mem_used -= |
175 | kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); | 185 | (bo->tbo.acc_size + amdgpu_bo_size(bo)); |
186 | kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size; | ||
176 | } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { | 187 | } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { |
177 | kfd_mem_limit.system_mem_used -= | 188 | kfd_mem_limit.system_mem_used -= |
178 | (bo->tbo.acc_size + amdgpu_bo_size(bo)); | 189 | (bo->tbo.acc_size + amdgpu_bo_size(bo)); |
190 | kfd_mem_limit.ttm_mem_used -= | ||
191 | (bo->tbo.acc_size + amdgpu_bo_size(bo)); | ||
192 | } else { | ||
193 | kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; | ||
194 | kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size; | ||
179 | } | 195 | } |
180 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, | 196 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, |
181 | "kfd system memory accounting unbalanced"); | 197 | "kfd system memory accounting unbalanced"); |
182 | WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, | 198 | WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, |
183 | "kfd userptr memory accounting unbalanced"); | 199 | "kfd TTM memory accounting unbalanced"); |
184 | 200 | ||
185 | spin_unlock(&kfd_mem_limit.mem_limit_lock); | 201 | spin_unlock(&kfd_mem_limit.mem_limit_lock); |
186 | } | 202 | } |
@@ -1219,10 +1235,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
1219 | 1235 | ||
1220 | amdgpu_sync_create(&(*mem)->sync); | 1236 | amdgpu_sync_create(&(*mem)->sync); |
1221 | 1237 | ||
1222 | ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); | 1238 | ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, |
1239 | alloc_domain, false); | ||
1223 | if (ret) { | 1240 | if (ret) { |
1224 | pr_debug("Insufficient system memory\n"); | 1241 | pr_debug("Insufficient system memory\n"); |
1225 | goto err_reserve_system_mem; | 1242 | goto err_reserve_limit; |
1226 | } | 1243 | } |
1227 | 1244 | ||
1228 | pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", | 1245 | pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", |
@@ -1270,10 +1287,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
1270 | allocate_init_user_pages_failed: | 1287 | allocate_init_user_pages_failed: |
1271 | amdgpu_bo_unref(&bo); | 1288 | amdgpu_bo_unref(&bo); |
1272 | /* Don't unreserve system mem limit twice */ | 1289 | /* Don't unreserve system mem limit twice */ |
1273 | goto err_reserve_system_mem; | 1290 | goto err_reserve_limit; |
1274 | err_bo_create: | 1291 | err_bo_create: |
1275 | unreserve_system_mem_limit(adev, size, alloc_domain); | 1292 | unreserve_system_mem_limit(adev, size, alloc_domain, false); |
1276 | err_reserve_system_mem: | 1293 | err_reserve_limit: |
1277 | mutex_destroy(&(*mem)->lock); | 1294 | mutex_destroy(&(*mem)->lock); |
1278 | kfree(*mem); | 1295 | kfree(*mem); |
1279 | return ret; | 1296 | return ret; |