aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
diff options
context:
space:
mode:
authorEric Huang <JinhuiEric.Huang@amd.com>2018-09-05 11:46:14 -0400
committerAlex Deucher <alexander.deucher@amd.com>2018-11-20 14:01:54 -0500
commit5d240da93edc29adb68320c5e475dc9c7fcad5dd (patch)
tree44551f46cc7a22198a922e187b8e28ceb16bf1a7 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
parent0f04e538580c086046e3b31cbc7ef4307a5b771d (diff)
drm/amdkfd: change system memory overcommit limit
It is to improve system limit by: 1. replacing userptrlimit with a total memory limit that conunts TTM memory usage and userptr usage. 2. counting acc size for all BOs. Signed-off-by: Eric Huang <JinHuiEric.Huang@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c99
1 files changed, 58 insertions, 41 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 84e4c1e4d109..f3129b912714 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -46,9 +46,9 @@
46/* Impose limit on how much memory KFD can use */ 46/* Impose limit on how much memory KFD can use */
47static struct { 47static struct {
48 uint64_t max_system_mem_limit; 48 uint64_t max_system_mem_limit;
49 uint64_t max_userptr_mem_limit; 49 uint64_t max_ttm_mem_limit;
50 int64_t system_mem_used; 50 int64_t system_mem_used;
51 int64_t userptr_mem_used; 51 int64_t ttm_mem_used;
52 spinlock_t mem_limit_lock; 52 spinlock_t mem_limit_lock;
53} kfd_mem_limit; 53} kfd_mem_limit;
54 54
@@ -90,8 +90,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
90} 90}
91 91
92/* Set memory usage limits. Current, limits are 92/* Set memory usage limits. Current, limits are
93 * System (kernel) memory - 3/8th System RAM 93 * System (TTM + userptr) memory - 3/4th System RAM
94 * Userptr memory - 3/4th System RAM 94 * TTM memory - 3/8th System RAM
95 */ 95 */
96void amdgpu_amdkfd_gpuvm_init_mem_limits(void) 96void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
97{ 97{
@@ -103,48 +103,54 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
103 mem *= si.mem_unit; 103 mem *= si.mem_unit;
104 104
105 spin_lock_init(&kfd_mem_limit.mem_limit_lock); 105 spin_lock_init(&kfd_mem_limit.mem_limit_lock);
106 kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); 106 kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
107 kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); 107 kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
108 pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", 108 pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
109 (kfd_mem_limit.max_system_mem_limit >> 20), 109 (kfd_mem_limit.max_system_mem_limit >> 20),
110 (kfd_mem_limit.max_userptr_mem_limit >> 20)); 110 (kfd_mem_limit.max_ttm_mem_limit >> 20));
111} 111}
112 112
113static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, 113static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
114 uint64_t size, u32 domain) 114 uint64_t size, u32 domain, bool sg)
115{ 115{
116 size_t acc_size; 116 size_t acc_size, system_mem_needed, ttm_mem_needed;
117 int ret = 0; 117 int ret = 0;
118 118
119 acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, 119 acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
120 sizeof(struct amdgpu_bo)); 120 sizeof(struct amdgpu_bo));
121 121
122 spin_lock(&kfd_mem_limit.mem_limit_lock); 122 spin_lock(&kfd_mem_limit.mem_limit_lock);
123
123 if (domain == AMDGPU_GEM_DOMAIN_GTT) { 124 if (domain == AMDGPU_GEM_DOMAIN_GTT) {
124 if (kfd_mem_limit.system_mem_used + (acc_size + size) > 125 /* TTM GTT memory */
125 kfd_mem_limit.max_system_mem_limit) { 126 system_mem_needed = acc_size + size;
126 ret = -ENOMEM; 127 ttm_mem_needed = acc_size + size;
127 goto err_no_mem; 128 } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
128 } 129 /* Userptr */
129 kfd_mem_limit.system_mem_used += (acc_size + size); 130 system_mem_needed = acc_size + size;
130 } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { 131 ttm_mem_needed = acc_size;
131 if ((kfd_mem_limit.system_mem_used + acc_size > 132 } else {
132 kfd_mem_limit.max_system_mem_limit) || 133 /* VRAM and SG */
133 (kfd_mem_limit.userptr_mem_used + (size + acc_size) > 134 system_mem_needed = acc_size;
134 kfd_mem_limit.max_userptr_mem_limit)) { 135 ttm_mem_needed = acc_size;
135 ret = -ENOMEM; 136 }
136 goto err_no_mem; 137
137 } 138 if ((kfd_mem_limit.system_mem_used + system_mem_needed >
138 kfd_mem_limit.system_mem_used += acc_size; 139 kfd_mem_limit.max_system_mem_limit) ||
139 kfd_mem_limit.userptr_mem_used += size; 140 (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
141 kfd_mem_limit.max_ttm_mem_limit))
142 ret = -ENOMEM;
143 else {
144 kfd_mem_limit.system_mem_used += system_mem_needed;
145 kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
140 } 146 }
141err_no_mem: 147
142 spin_unlock(&kfd_mem_limit.mem_limit_lock); 148 spin_unlock(&kfd_mem_limit.mem_limit_lock);
143 return ret; 149 return ret;
144} 150}
145 151
146static void unreserve_system_mem_limit(struct amdgpu_device *adev, 152static void unreserve_system_mem_limit(struct amdgpu_device *adev,
147 uint64_t size, u32 domain) 153 uint64_t size, u32 domain, bool sg)
148{ 154{
149 size_t acc_size; 155 size_t acc_size;
150 156
@@ -154,14 +160,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
154 spin_lock(&kfd_mem_limit.mem_limit_lock); 160 spin_lock(&kfd_mem_limit.mem_limit_lock);
155 if (domain == AMDGPU_GEM_DOMAIN_GTT) { 161 if (domain == AMDGPU_GEM_DOMAIN_GTT) {
156 kfd_mem_limit.system_mem_used -= (acc_size + size); 162 kfd_mem_limit.system_mem_used -= (acc_size + size);
157 } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { 163 kfd_mem_limit.ttm_mem_used -= (acc_size + size);
164 } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
165 kfd_mem_limit.system_mem_used -= (acc_size + size);
166 kfd_mem_limit.ttm_mem_used -= acc_size;
167 } else {
158 kfd_mem_limit.system_mem_used -= acc_size; 168 kfd_mem_limit.system_mem_used -= acc_size;
159 kfd_mem_limit.userptr_mem_used -= size; 169 kfd_mem_limit.ttm_mem_used -= acc_size;
160 } 170 }
161 WARN_ONCE(kfd_mem_limit.system_mem_used < 0, 171 WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
162 "kfd system memory accounting unbalanced"); 172 "kfd system memory accounting unbalanced");
163 WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, 173 WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
164 "kfd userptr memory accounting unbalanced"); 174 "kfd TTM memory accounting unbalanced");
165 175
166 spin_unlock(&kfd_mem_limit.mem_limit_lock); 176 spin_unlock(&kfd_mem_limit.mem_limit_lock);
167} 177}
@@ -171,16 +181,22 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
171 spin_lock(&kfd_mem_limit.mem_limit_lock); 181 spin_lock(&kfd_mem_limit.mem_limit_lock);
172 182
173 if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { 183 if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
174 kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; 184 kfd_mem_limit.system_mem_used -=
175 kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); 185 (bo->tbo.acc_size + amdgpu_bo_size(bo));
186 kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
176 } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { 187 } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
177 kfd_mem_limit.system_mem_used -= 188 kfd_mem_limit.system_mem_used -=
178 (bo->tbo.acc_size + amdgpu_bo_size(bo)); 189 (bo->tbo.acc_size + amdgpu_bo_size(bo));
190 kfd_mem_limit.ttm_mem_used -=
191 (bo->tbo.acc_size + amdgpu_bo_size(bo));
192 } else {
193 kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
194 kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
179 } 195 }
180 WARN_ONCE(kfd_mem_limit.system_mem_used < 0, 196 WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
181 "kfd system memory accounting unbalanced"); 197 "kfd system memory accounting unbalanced");
182 WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, 198 WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
183 "kfd userptr memory accounting unbalanced"); 199 "kfd TTM memory accounting unbalanced");
184 200
185 spin_unlock(&kfd_mem_limit.mem_limit_lock); 201 spin_unlock(&kfd_mem_limit.mem_limit_lock);
186} 202}
@@ -1219,10 +1235,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1219 1235
1220 amdgpu_sync_create(&(*mem)->sync); 1236 amdgpu_sync_create(&(*mem)->sync);
1221 1237
1222 ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); 1238 ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size,
1239 alloc_domain, false);
1223 if (ret) { 1240 if (ret) {
1224 pr_debug("Insufficient system memory\n"); 1241 pr_debug("Insufficient system memory\n");
1225 goto err_reserve_system_mem; 1242 goto err_reserve_limit;
1226 } 1243 }
1227 1244
1228 pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", 1245 pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
@@ -1270,10 +1287,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1270allocate_init_user_pages_failed: 1287allocate_init_user_pages_failed:
1271 amdgpu_bo_unref(&bo); 1288 amdgpu_bo_unref(&bo);
1272 /* Don't unreserve system mem limit twice */ 1289 /* Don't unreserve system mem limit twice */
1273 goto err_reserve_system_mem; 1290 goto err_reserve_limit;
1274err_bo_create: 1291err_bo_create:
1275 unreserve_system_mem_limit(adev, size, alloc_domain); 1292 unreserve_system_mem_limit(adev, size, alloc_domain, false);
1276err_reserve_system_mem: 1293err_reserve_limit:
1277 mutex_destroy(&(*mem)->lock); 1294 mutex_destroy(&(*mem)->lock);
1278 kfree(*mem); 1295 kfree(*mem);
1279 return ret; 1296 return ret;