diff options
author | Shawn Bohrer <sbohrer@rgmadvisors.com> | 2014-09-03 13:13:57 -0400 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2014-09-19 12:55:42 -0400 |
commit | 87773dd56d5405ac28119fcfadacefd35877c18f (patch) | |
tree | b17f642eebccabe30740f03ccc6e6c9e26fd8948 /drivers/infiniband | |
parent | 52addcf9d6669fa439387610bc65c92fa0980cef (diff) |
IB: ib_umem_release() should decrement mm->pinned_vm from ib_umem_get
In debugging an application that receives -ENOMEM from ib_reg_mr(), I
found that ib_umem_get() can fail because the pinned_vm count has
wrapped causing it to always be larger than the lock limit even with
RLIMIT_MEMLOCK set to RLIM_INFINITY.
The wrapping of pinned_vm occurs because the process that calls
ib_reg_mr() will have its mm->pinned_vm count incremented. Later a
different process with a different mm_struct than the one that
allocated the ib_umem struct ends up releasing it which results in
decrementing the new processes mm->pinned_vm count past zero and
wrapping.
I'm not entirely sure what circumstances cause a different process to
release the ib_umem than the one that allocated it but the kernel
stack trace of the freeing process from my situation looks like the
following:
Call Trace:
[<ffffffff814d64b1>] dump_stack+0x19/0x1b
[<ffffffffa0b522a5>] ib_umem_release+0x1f5/0x200 [ib_core]
[<ffffffffa0b90681>] mlx4_ib_destroy_qp+0x241/0x440 [mlx4_ib]
[<ffffffffa0b4d93c>] ib_destroy_qp+0x12c/0x170 [ib_core]
[<ffffffffa0cc7129>] ib_uverbs_close+0x259/0x4e0 [ib_uverbs]
[<ffffffff81141cba>] __fput+0xba/0x240
[<ffffffff81141e4e>] ____fput+0xe/0x10
[<ffffffff81060894>] task_work_run+0xc4/0xe0
[<ffffffff810029e5>] do_notify_resume+0x95/0xa0
[<ffffffff814e3dd0>] int_signal+0x12/0x17
The following patch fixes the issue by storing the pid struct of the
process that calls ib_umem_get() so that ib_umem_release and/or
ib_umem_account() can properly decrement the pinned_vm count of the
correct mm_struct.
Signed-off-by: Shawn Bohrer <sbohrer@rgmadvisors.com>
Reviewed-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/core/umem.c | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index a3a2e9c1639b..df0c4f605a21 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c | |||
@@ -105,6 +105,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, | |||
105 | umem->length = size; | 105 | umem->length = size; |
106 | umem->offset = addr & ~PAGE_MASK; | 106 | umem->offset = addr & ~PAGE_MASK; |
107 | umem->page_size = PAGE_SIZE; | 107 | umem->page_size = PAGE_SIZE; |
108 | umem->pid = get_task_pid(current, PIDTYPE_PID); | ||
108 | /* | 109 | /* |
109 | * We ask for writable memory if any access flags other than | 110 | * We ask for writable memory if any access flags other than |
110 | * "remote read" are set. "Local write" and "remote write" | 111 | * "remote read" are set. "Local write" and "remote write" |
@@ -198,6 +199,7 @@ out: | |||
198 | if (ret < 0) { | 199 | if (ret < 0) { |
199 | if (need_release) | 200 | if (need_release) |
200 | __ib_umem_release(context->device, umem, 0); | 201 | __ib_umem_release(context->device, umem, 0); |
202 | put_pid(umem->pid); | ||
201 | kfree(umem); | 203 | kfree(umem); |
202 | } else | 204 | } else |
203 | current->mm->pinned_vm = locked; | 205 | current->mm->pinned_vm = locked; |
@@ -230,15 +232,19 @@ void ib_umem_release(struct ib_umem *umem) | |||
230 | { | 232 | { |
231 | struct ib_ucontext *context = umem->context; | 233 | struct ib_ucontext *context = umem->context; |
232 | struct mm_struct *mm; | 234 | struct mm_struct *mm; |
235 | struct task_struct *task; | ||
233 | unsigned long diff; | 236 | unsigned long diff; |
234 | 237 | ||
235 | __ib_umem_release(umem->context->device, umem, 1); | 238 | __ib_umem_release(umem->context->device, umem, 1); |
236 | 239 | ||
237 | mm = get_task_mm(current); | 240 | task = get_pid_task(umem->pid, PIDTYPE_PID); |
238 | if (!mm) { | 241 | put_pid(umem->pid); |
239 | kfree(umem); | 242 | if (!task) |
240 | return; | 243 | goto out; |
241 | } | 244 | mm = get_task_mm(task); |
245 | put_task_struct(task); | ||
246 | if (!mm) | ||
247 | goto out; | ||
242 | 248 | ||
243 | diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; | 249 | diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; |
244 | 250 | ||
@@ -262,9 +268,10 @@ void ib_umem_release(struct ib_umem *umem) | |||
262 | } else | 268 | } else |
263 | down_write(&mm->mmap_sem); | 269 | down_write(&mm->mmap_sem); |
264 | 270 | ||
265 | current->mm->pinned_vm -= diff; | 271 | mm->pinned_vm -= diff; |
266 | up_write(&mm->mmap_sem); | 272 | up_write(&mm->mmap_sem); |
267 | mmput(mm); | 273 | mmput(mm); |
274 | out: | ||
268 | kfree(umem); | 275 | kfree(umem); |
269 | } | 276 | } |
270 | EXPORT_SYMBOL(ib_umem_release); | 277 | EXPORT_SYMBOL(ib_umem_release); |