aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux.com>2011-10-31 20:07:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-10-31 20:30:46 -0400
commitbc3e53f682d93df677dbd5006a404722b3adfe18 (patch)
treef386c29f13626e2b7d98d5a52525a78a9b59e447
parentf11c0ca501af89fc07b0d9f17531ba3b68a4ef39 (diff)
mm: distinguish between mlocked and pinned pages
Some kernel components pin user space memory (infiniband and perf) (by increasing the page count) and account that memory as "mlocked". The difference between mlocking and pinning is: A. mlocked pages are marked with PG_mlocked and are exempt from swapping. Page migration may move them around though. They are kept on a special LRU list. B. Pinned pages cannot be moved because something needs to directly access physical memory. They may not be on any LRU list. I recently saw an mlockalled process where mm->locked_vm became bigger than the virtual size of the process (!) because some memory was accounted for twice: Once when the page was mlocked and once when the Infiniband layer increased the refcount because it needt to pin the RDMA memory. This patch introduces a separate counter for pinned pages and accounts them seperately. Signed-off-by: Christoph Lameter <cl@linux.com> Cc: Mike Marciniszyn <infinipath@qlogic.com> Cc: Roland Dreier <roland@kernel.org> Cc: Sean Hefty <sean.hefty@intel.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/infiniband/core/umem.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c4
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--kernel/events/core.c6
6 files changed, 14 insertions, 12 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index b645e558876f..9155f91d66bf 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -136,7 +136,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
136 136
137 down_write(&current->mm->mmap_sem); 137 down_write(&current->mm->mmap_sem);
138 138
139 locked = npages + current->mm->locked_vm; 139 locked = npages + current->mm->pinned_vm;
140 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 140 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
141 141
142 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { 142 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
@@ -206,7 +206,7 @@ out:
206 __ib_umem_release(context->device, umem, 0); 206 __ib_umem_release(context->device, umem, 0);
207 kfree(umem); 207 kfree(umem);
208 } else 208 } else
209 current->mm->locked_vm = locked; 209 current->mm->pinned_vm = locked;
210 210
211 up_write(&current->mm->mmap_sem); 211 up_write(&current->mm->mmap_sem);
212 if (vma_list) 212 if (vma_list)
@@ -222,7 +222,7 @@ static void ib_umem_account(struct work_struct *work)
222 struct ib_umem *umem = container_of(work, struct ib_umem, work); 222 struct ib_umem *umem = container_of(work, struct ib_umem, work);
223 223
224 down_write(&umem->mm->mmap_sem); 224 down_write(&umem->mm->mmap_sem);
225 umem->mm->locked_vm -= umem->diff; 225 umem->mm->pinned_vm -= umem->diff;
226 up_write(&umem->mm->mmap_sem); 226 up_write(&umem->mm->mmap_sem);
227 mmput(umem->mm); 227 mmput(umem->mm);
228 kfree(umem); 228 kfree(umem);
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index cfed5399f074..dc66c4506916 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -79,7 +79,7 @@ static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
79 goto bail_release; 79 goto bail_release;
80 } 80 }
81 81
82 current->mm->locked_vm += num_pages; 82 current->mm->pinned_vm += num_pages;
83 83
84 ret = 0; 84 ret = 0;
85 goto bail; 85 goto bail;
@@ -178,7 +178,7 @@ void ipath_release_user_pages(struct page **p, size_t num_pages)
178 178
179 __ipath_release_user_pages(p, num_pages, 1); 179 __ipath_release_user_pages(p, num_pages, 1);
180 180
181 current->mm->locked_vm -= num_pages; 181 current->mm->pinned_vm -= num_pages;
182 182
183 up_write(&current->mm->mmap_sem); 183 up_write(&current->mm->mmap_sem);
184} 184}
@@ -195,7 +195,7 @@ static void user_pages_account(struct work_struct *_work)
195 container_of(_work, struct ipath_user_pages_work, work); 195 container_of(_work, struct ipath_user_pages_work, work);
196 196
197 down_write(&work->mm->mmap_sem); 197 down_write(&work->mm->mmap_sem);
198 work->mm->locked_vm -= work->num_pages; 198 work->mm->pinned_vm -= work->num_pages;
199 up_write(&work->mm->mmap_sem); 199 up_write(&work->mm->mmap_sem);
200 mmput(work->mm); 200 mmput(work->mm);
201 kfree(work); 201 kfree(work);
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 7689e49c13c9..2bc1d2b96298 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -74,7 +74,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
74 goto bail_release; 74 goto bail_release;
75 } 75 }
76 76
77 current->mm->locked_vm += num_pages; 77 current->mm->pinned_vm += num_pages;
78 78
79 ret = 0; 79 ret = 0;
80 goto bail; 80 goto bail;
@@ -151,7 +151,7 @@ void qib_release_user_pages(struct page **p, size_t num_pages)
151 __qib_release_user_pages(p, num_pages, 1); 151 __qib_release_user_pages(p, num_pages, 1);
152 152
153 if (current->mm) { 153 if (current->mm) {
154 current->mm->locked_vm -= num_pages; 154 current->mm->pinned_vm -= num_pages;
155 up_write(&current->mm->mmap_sem); 155 up_write(&current->mm->mmap_sem);
156 } 156 }
157} 157}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index c7d4ee663f14..e418c5abdb0e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -44,6 +44,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
44 "VmPeak:\t%8lu kB\n" 44 "VmPeak:\t%8lu kB\n"
45 "VmSize:\t%8lu kB\n" 45 "VmSize:\t%8lu kB\n"
46 "VmLck:\t%8lu kB\n" 46 "VmLck:\t%8lu kB\n"
47 "VmPin:\t%8lu kB\n"
47 "VmHWM:\t%8lu kB\n" 48 "VmHWM:\t%8lu kB\n"
48 "VmRSS:\t%8lu kB\n" 49 "VmRSS:\t%8lu kB\n"
49 "VmData:\t%8lu kB\n" 50 "VmData:\t%8lu kB\n"
@@ -55,6 +56,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
55 hiwater_vm << (PAGE_SHIFT-10), 56 hiwater_vm << (PAGE_SHIFT-10),
56 (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), 57 (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
57 mm->locked_vm << (PAGE_SHIFT-10), 58 mm->locked_vm << (PAGE_SHIFT-10),
59 mm->pinned_vm << (PAGE_SHIFT-10),
58 hiwater_rss << (PAGE_SHIFT-10), 60 hiwater_rss << (PAGE_SHIFT-10),
59 total_rss << (PAGE_SHIFT-10), 61 total_rss << (PAGE_SHIFT-10),
60 data << (PAGE_SHIFT-10), 62 data << (PAGE_SHIFT-10),
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6456624aa964..f3175830cc73 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -304,7 +304,7 @@ struct mm_struct {
304 unsigned long hiwater_rss; /* High-watermark of RSS usage */ 304 unsigned long hiwater_rss; /* High-watermark of RSS usage */
305 unsigned long hiwater_vm; /* High-water virtual memory usage */ 305 unsigned long hiwater_vm; /* High-water virtual memory usage */
306 306
307 unsigned long total_vm, locked_vm, shared_vm, exec_vm; 307 unsigned long total_vm, locked_vm, pinned_vm, shared_vm, exec_vm;
308 unsigned long stack_vm, reserved_vm, def_flags, nr_ptes; 308 unsigned long stack_vm, reserved_vm, def_flags, nr_ptes;
309 unsigned long start_code, end_code, start_data, end_data; 309 unsigned long start_code, end_code, start_data, end_data;
310 unsigned long start_brk, brk, start_stack; 310 unsigned long start_brk, brk, start_stack;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d1a1bee35228..12a0287e0358 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3544,7 +3544,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
3544 struct ring_buffer *rb = event->rb; 3544 struct ring_buffer *rb = event->rb;
3545 3545
3546 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); 3546 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
3547 vma->vm_mm->locked_vm -= event->mmap_locked; 3547 vma->vm_mm->pinned_vm -= event->mmap_locked;
3548 rcu_assign_pointer(event->rb, NULL); 3548 rcu_assign_pointer(event->rb, NULL);
3549 mutex_unlock(&event->mmap_mutex); 3549 mutex_unlock(&event->mmap_mutex);
3550 3550
@@ -3625,7 +3625,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3625 3625
3626 lock_limit = rlimit(RLIMIT_MEMLOCK); 3626 lock_limit = rlimit(RLIMIT_MEMLOCK);
3627 lock_limit >>= PAGE_SHIFT; 3627 lock_limit >>= PAGE_SHIFT;
3628 locked = vma->vm_mm->locked_vm + extra; 3628 locked = vma->vm_mm->pinned_vm + extra;
3629 3629
3630 if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && 3630 if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
3631 !capable(CAP_IPC_LOCK)) { 3631 !capable(CAP_IPC_LOCK)) {
@@ -3651,7 +3651,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3651 atomic_long_add(user_extra, &user->locked_vm); 3651 atomic_long_add(user_extra, &user->locked_vm);
3652 event->mmap_locked = extra; 3652 event->mmap_locked = extra;
3653 event->mmap_user = get_current_user(); 3653 event->mmap_user = get_current_user();
3654 vma->vm_mm->locked_vm += event->mmap_locked; 3654 vma->vm_mm->pinned_vm += event->mmap_locked;
3655 3655
3656unlock: 3656unlock:
3657 if (!ret) 3657 if (!ret)