summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavidlohr Bueso <dave@stgolabs.net>2019-02-06 12:59:15 -0500
committerJason Gunthorpe <jgg@mellanox.com>2019-02-07 14:54:02 -0500
commit70f8a3ca68d3e1f3344d959981ca55d5f6ec77f7 (patch)
tree5c43234a8f1697bf3d1a4a8926a014b922b046ff
parenta2bfd708b17adb6e597e70d4eca824667f2d4e3c (diff)
mm: make mm->pinned_vm an atomic64 counter
Taking a sleeping lock to _only_ increment a variable is quite the overkill, and pretty much all users do this. Furthermore, some drivers (ie: infiniband and scif) that need pinned semantics can go to quite some trouble to actually delay via workqueue (un)accounting for pinned pages when not possible to acquire it. By making the counter atomic we no longer need to hold the mmap_sem and can simply some code around it for pinned_vm users. The counter is 64-bit such that we need not worry about overflows such as rdma user input controlled from userspace. Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Christoph Lameter <cl@linux.com> Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com> Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--drivers/infiniband/core/umem.c12
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c8
-rw-r--r--drivers/misc/mic/scif/scif_rma.c6
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--kernel/events/core.c8
-rw-r--r--kernel/fork.c2
-rw-r--r--mm/debug.c5
10 files changed, 28 insertions, 27 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 1efe0a74e06b..678abe1afcba 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -166,13 +166,13 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
166 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 166 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
167 167
168 down_write(&mm->mmap_sem); 168 down_write(&mm->mmap_sem);
169 if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) || 169 new_pinned = atomic64_read(&mm->pinned_vm) + npages;
170 (new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) { 170 if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
171 up_write(&mm->mmap_sem); 171 up_write(&mm->mmap_sem);
172 ret = -ENOMEM; 172 ret = -ENOMEM;
173 goto out; 173 goto out;
174 } 174 }
175 mm->pinned_vm = new_pinned; 175 atomic64_set(&mm->pinned_vm, new_pinned);
176 up_write(&mm->mmap_sem); 176 up_write(&mm->mmap_sem);
177 177
178 cur_base = addr & PAGE_MASK; 178 cur_base = addr & PAGE_MASK;
@@ -234,7 +234,7 @@ umem_release:
234 __ib_umem_release(context->device, umem, 0); 234 __ib_umem_release(context->device, umem, 0);
235vma: 235vma:
236 down_write(&mm->mmap_sem); 236 down_write(&mm->mmap_sem);
237 mm->pinned_vm -= ib_umem_num_pages(umem); 237 atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
238 up_write(&mm->mmap_sem); 238 up_write(&mm->mmap_sem);
239out: 239out:
240 if (vma_list) 240 if (vma_list)
@@ -263,7 +263,7 @@ static void ib_umem_release_defer(struct work_struct *work)
263 struct ib_umem *umem = container_of(work, struct ib_umem, work); 263 struct ib_umem *umem = container_of(work, struct ib_umem, work);
264 264
265 down_write(&umem->owning_mm->mmap_sem); 265 down_write(&umem->owning_mm->mmap_sem);
266 umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem); 266 atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
267 up_write(&umem->owning_mm->mmap_sem); 267 up_write(&umem->owning_mm->mmap_sem);
268 268
269 __ib_umem_release_tail(umem); 269 __ib_umem_release_tail(umem);
@@ -302,7 +302,7 @@ void ib_umem_release(struct ib_umem *umem)
302 } else { 302 } else {
303 down_write(&umem->owning_mm->mmap_sem); 303 down_write(&umem->owning_mm->mmap_sem);
304 } 304 }
305 umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem); 305 atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
306 up_write(&umem->owning_mm->mmap_sem); 306 up_write(&umem->owning_mm->mmap_sem);
307 307
308 __ib_umem_release_tail(umem); 308 __ib_umem_release_tail(umem);
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index e341e6dcc388..40a6e434190f 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -92,7 +92,7 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
92 size = DIV_ROUND_UP(size, PAGE_SIZE); 92 size = DIV_ROUND_UP(size, PAGE_SIZE);
93 93
94 down_read(&mm->mmap_sem); 94 down_read(&mm->mmap_sem);
95 pinned = mm->pinned_vm; 95 pinned = atomic64_read(&mm->pinned_vm);
96 up_read(&mm->mmap_sem); 96 up_read(&mm->mmap_sem);
97 97
98 /* First, check the absolute limit against all pinned pages. */ 98 /* First, check the absolute limit against all pinned pages. */
@@ -112,7 +112,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
112 return ret; 112 return ret;
113 113
114 down_write(&mm->mmap_sem); 114 down_write(&mm->mmap_sem);
115 mm->pinned_vm += ret; 115 atomic64_add(ret, &mm->pinned_vm);
116 up_write(&mm->mmap_sem); 116 up_write(&mm->mmap_sem);
117 117
118 return ret; 118 return ret;
@@ -131,7 +131,7 @@ void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
131 131
132 if (mm) { /* during close after signal, mm can be NULL */ 132 if (mm) { /* during close after signal, mm can be NULL */
133 down_write(&mm->mmap_sem); 133 down_write(&mm->mmap_sem);
134 mm->pinned_vm -= npages; 134 atomic64_sub(npages, &mm->pinned_vm);
135 up_write(&mm->mmap_sem); 135 up_write(&mm->mmap_sem);
136 } 136 }
137} 137}
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 075f09fb7ce3..c6c81022d313 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -75,7 +75,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
75 goto bail_release; 75 goto bail_release;
76 } 76 }
77 77
78 current->mm->pinned_vm += num_pages; 78 atomic64_add(num_pages, &current->mm->pinned_vm);
79 79
80 ret = 0; 80 ret = 0;
81 goto bail; 81 goto bail;
@@ -156,7 +156,7 @@ void qib_release_user_pages(struct page **p, size_t num_pages)
156 __qib_release_user_pages(p, num_pages, 1); 156 __qib_release_user_pages(p, num_pages, 1);
157 157
158 if (current->mm) { 158 if (current->mm) {
159 current->mm->pinned_vm -= num_pages; 159 atomic64_sub(num_pages, &current->mm->pinned_vm);
160 up_write(&current->mm->mmap_sem); 160 up_write(&current->mm->mmap_sem);
161 } 161 }
162} 162}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index ce01a59fccc4..854436a2b437 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -129,7 +129,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
129 uiomr->owning_mm = mm = current->mm; 129 uiomr->owning_mm = mm = current->mm;
130 down_write(&mm->mmap_sem); 130 down_write(&mm->mmap_sem);
131 131
132 locked = npages + current->mm->pinned_vm; 132 locked = npages + atomic64_read(&current->mm->pinned_vm);
133 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 133 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
134 134
135 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { 135 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
@@ -187,7 +187,7 @@ out:
187 if (ret < 0) 187 if (ret < 0)
188 usnic_uiom_put_pages(chunk_list, 0); 188 usnic_uiom_put_pages(chunk_list, 0);
189 else { 189 else {
190 mm->pinned_vm = locked; 190 atomic64_set(&mm->pinned_vm, locked);
191 mmgrab(uiomr->owning_mm); 191 mmgrab(uiomr->owning_mm);
192 } 192 }
193 193
@@ -441,7 +441,7 @@ static void usnic_uiom_release_defer(struct work_struct *work)
441 container_of(work, struct usnic_uiom_reg, work); 441 container_of(work, struct usnic_uiom_reg, work);
442 442
443 down_write(&uiomr->owning_mm->mmap_sem); 443 down_write(&uiomr->owning_mm->mmap_sem);
444 uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr); 444 atomic64_sub(usnic_uiom_num_pages(uiomr), &uiomr->owning_mm->pinned_vm);
445 up_write(&uiomr->owning_mm->mmap_sem); 445 up_write(&uiomr->owning_mm->mmap_sem);
446 446
447 __usnic_uiom_release_tail(uiomr); 447 __usnic_uiom_release_tail(uiomr);
@@ -469,7 +469,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
469 } else { 469 } else {
470 down_write(&uiomr->owning_mm->mmap_sem); 470 down_write(&uiomr->owning_mm->mmap_sem);
471 } 471 }
472 uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr); 472 atomic64_sub(usnic_uiom_num_pages(uiomr), &uiomr->owning_mm->pinned_vm);
473 up_write(&uiomr->owning_mm->mmap_sem); 473 up_write(&uiomr->owning_mm->mmap_sem);
474 474
475 __usnic_uiom_release_tail(uiomr); 475 __usnic_uiom_release_tail(uiomr);
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index 749321eb91ae..2448368f181e 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -285,7 +285,7 @@ __scif_dec_pinned_vm_lock(struct mm_struct *mm,
285 } else { 285 } else {
286 down_write(&mm->mmap_sem); 286 down_write(&mm->mmap_sem);
287 } 287 }
288 mm->pinned_vm -= nr_pages; 288 atomic64_sub(nr_pages, &mm->pinned_vm);
289 up_write(&mm->mmap_sem); 289 up_write(&mm->mmap_sem);
290 return 0; 290 return 0;
291} 291}
@@ -299,7 +299,7 @@ static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
299 return 0; 299 return 0;
300 300
301 locked = nr_pages; 301 locked = nr_pages;
302 locked += mm->pinned_vm; 302 locked += atomic64_read(&mm->pinned_vm);
303 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 303 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
304 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { 304 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
305 dev_err(scif_info.mdev.this_device, 305 dev_err(scif_info.mdev.this_device,
@@ -307,7 +307,7 @@ static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
307 locked, lock_limit); 307 locked, lock_limit);
308 return -ENOMEM; 308 return -ENOMEM;
309 } 309 }
310 mm->pinned_vm = locked; 310 atomic64_set(&mm->pinned_vm, locked);
311 return 0; 311 return 0;
312} 312}
313 313
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0ec9edab2f3..d2902962244d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
59 SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); 59 SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
60 SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); 60 SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
61 SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); 61 SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
62 SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm); 62 SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm));
63 SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss); 63 SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
64 SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss); 64 SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
65 SEQ_PUT_DEC(" kB\nRssAnon:\t", anon); 65 SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2c471a2c43fa..acea2ea2d6c4 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -405,7 +405,7 @@ struct mm_struct {
405 405
406 unsigned long total_vm; /* Total pages mapped */ 406 unsigned long total_vm; /* Total pages mapped */
407 unsigned long locked_vm; /* Pages that have PG_mlocked set */ 407 unsigned long locked_vm; /* Pages that have PG_mlocked set */
408 unsigned long pinned_vm; /* Refcount permanently increased */ 408 atomic64_t pinned_vm; /* Refcount permanently increased */
409 unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ 409 unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
410 unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ 410 unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
411 unsigned long stack_vm; /* VM_STACK */ 411 unsigned long stack_vm; /* VM_STACK */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e5ede6918050..29e9f2473656 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5459,7 +5459,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
5459 5459
5460 /* now it's safe to free the pages */ 5460 /* now it's safe to free the pages */
5461 atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm); 5461 atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
5462 vma->vm_mm->pinned_vm -= rb->aux_mmap_locked; 5462 atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
5463 5463
5464 /* this has to be the last one */ 5464 /* this has to be the last one */
5465 rb_free_aux(rb); 5465 rb_free_aux(rb);
@@ -5532,7 +5532,7 @@ again:
5532 */ 5532 */
5533 5533
5534 atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm); 5534 atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
5535 vma->vm_mm->pinned_vm -= mmap_locked; 5535 atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
5536 free_uid(mmap_user); 5536 free_uid(mmap_user);
5537 5537
5538out_put: 5538out_put:
@@ -5680,7 +5680,7 @@ accounting:
5680 5680
5681 lock_limit = rlimit(RLIMIT_MEMLOCK); 5681 lock_limit = rlimit(RLIMIT_MEMLOCK);
5682 lock_limit >>= PAGE_SHIFT; 5682 lock_limit >>= PAGE_SHIFT;
5683 locked = vma->vm_mm->pinned_vm + extra; 5683 locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
5684 5684
5685 if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && 5685 if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
5686 !capable(CAP_IPC_LOCK)) { 5686 !capable(CAP_IPC_LOCK)) {
@@ -5721,7 +5721,7 @@ accounting:
5721unlock: 5721unlock:
5722 if (!ret) { 5722 if (!ret) {
5723 atomic_long_add(user_extra, &user->locked_vm); 5723 atomic_long_add(user_extra, &user->locked_vm);
5724 vma->vm_mm->pinned_vm += extra; 5724 atomic64_add(extra, &vma->vm_mm->pinned_vm);
5725 5725
5726 atomic_inc(&event->mmap_count); 5726 atomic_inc(&event->mmap_count);
5727 } else if (rb) { 5727 } else if (rb) {
diff --git a/kernel/fork.c b/kernel/fork.c
index b69248e6f0e0..85e08c379a9e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -981,7 +981,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
981 mm_pgtables_bytes_init(mm); 981 mm_pgtables_bytes_init(mm);
982 mm->map_count = 0; 982 mm->map_count = 0;
983 mm->locked_vm = 0; 983 mm->locked_vm = 0;
984 mm->pinned_vm = 0; 984 atomic64_set(&mm->pinned_vm, 0);
985 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); 985 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
986 spin_lock_init(&mm->page_table_lock); 986 spin_lock_init(&mm->page_table_lock);
987 spin_lock_init(&mm->arg_lock); 987 spin_lock_init(&mm->arg_lock);
diff --git a/mm/debug.c b/mm/debug.c
index 0abb987dad9b..7d13941a72f9 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -135,7 +135,7 @@ void dump_mm(const struct mm_struct *mm)
135 "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n" 135 "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
136 "pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n" 136 "pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
137 "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n" 137 "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
138 "pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n" 138 "pinned_vm %llx data_vm %lx exec_vm %lx stack_vm %lx\n"
139 "start_code %lx end_code %lx start_data %lx end_data %lx\n" 139 "start_code %lx end_code %lx start_data %lx end_data %lx\n"
140 "start_brk %lx brk %lx start_stack %lx\n" 140 "start_brk %lx brk %lx start_stack %lx\n"
141 "arg_start %lx arg_end %lx env_start %lx env_end %lx\n" 141 "arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
@@ -166,7 +166,8 @@ void dump_mm(const struct mm_struct *mm)
166 mm_pgtables_bytes(mm), 166 mm_pgtables_bytes(mm),
167 mm->map_count, 167 mm->map_count,
168 mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm, 168 mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
169 mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm, 169 atomic64_read(&mm->pinned_vm),
170 mm->data_vm, mm->exec_vm, mm->stack_vm,
170 mm->start_code, mm->end_code, mm->start_data, mm->end_data, 171 mm->start_code, mm->end_code, mm->start_data, mm->end_data,
171 mm->start_brk, mm->brk, mm->start_stack, 172 mm->start_brk, mm->brk, mm->start_stack,
172 mm->arg_start, mm->arg_end, mm->env_start, mm->env_end, 173 mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,