diff options
-rw-r--r-- | fs/proc/task_mmu.c | 417 |
1 files changed, 210 insertions, 207 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index fcdbd233f252..308fc5451e43 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -114,36 +114,122 @@ static void pad_len_spaces(struct seq_file *m, int len) | |||
114 | seq_printf(m, "%*c", len, ' '); | 114 | seq_printf(m, "%*c", len, ' '); |
115 | } | 115 | } |
116 | 116 | ||
117 | /* | 117 | static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) |
118 | * Proportional Set Size(PSS): my share of RSS. | 118 | { |
119 | * | 119 | if (vma && vma != priv->tail_vma) { |
120 | * PSS of a process is the count of pages it has in memory, where each | 120 | struct mm_struct *mm = vma->vm_mm; |
121 | * page is divided by the number of processes sharing it. So if a | 121 | up_read(&mm->mmap_sem); |
122 | * process has 1000 pages all to itself, and 1000 shared with one other | 122 | mmput(mm); |
123 | * process, its PSS will be 1500. | 123 | } |
124 | * | 124 | } |
125 | * To keep (accumulated) division errors low, we adopt a 64bit | ||
126 | * fixed-point pss counter to minimize division errors. So (pss >> | ||
127 | * PSS_SHIFT) would be the real byte count. | ||
128 | * | ||
129 | * A shift of 12 before division means (assuming 4K page size): | ||
130 | * - 1M 3-user-pages add up to 8KB errors; | ||
131 | * - supports mapcount up to 2^24, or 16M; | ||
132 | * - supports PSS up to 2^52 bytes, or 4PB. | ||
133 | */ | ||
134 | #define PSS_SHIFT 12 | ||
135 | 125 | ||
136 | struct mem_size_stats | 126 | static void *m_start(struct seq_file *m, loff_t *pos) |
137 | { | 127 | { |
138 | struct vm_area_struct *vma; | 128 | struct proc_maps_private *priv = m->private; |
139 | unsigned long resident; | 129 | unsigned long last_addr = m->version; |
140 | unsigned long shared_clean; | 130 | struct mm_struct *mm; |
141 | unsigned long shared_dirty; | 131 | struct vm_area_struct *vma, *tail_vma = NULL; |
142 | unsigned long private_clean; | 132 | loff_t l = *pos; |
143 | unsigned long private_dirty; | 133 | |
144 | unsigned long referenced; | 134 | /* Clear the per syscall fields in priv */ |
145 | u64 pss; | 135 | priv->task = NULL; |
146 | }; | 136 | priv->tail_vma = NULL; |
137 | |||
138 | /* | ||
139 | * We remember last_addr rather than next_addr to hit with | ||
140 | * mmap_cache most of the time. We have zero last_addr at | ||
141 | * the beginning and also after lseek. We will have -1 last_addr | ||
142 | * after the end of the vmas. | ||
143 | */ | ||
144 | |||
145 | if (last_addr == -1UL) | ||
146 | return NULL; | ||
147 | |||
148 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | ||
149 | if (!priv->task) | ||
150 | return NULL; | ||
151 | |||
152 | mm = mm_for_maps(priv->task); | ||
153 | if (!mm) | ||
154 | return NULL; | ||
155 | |||
156 | tail_vma = get_gate_vma(priv->task); | ||
157 | priv->tail_vma = tail_vma; | ||
158 | |||
159 | /* Start with last addr hint */ | ||
160 | vma = find_vma(mm, last_addr); | ||
161 | if (last_addr && vma) { | ||
162 | vma = vma->vm_next; | ||
163 | goto out; | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * Check the vma index is within the range and do | ||
168 | * sequential scan until m_index. | ||
169 | */ | ||
170 | vma = NULL; | ||
171 | if ((unsigned long)l < mm->map_count) { | ||
172 | vma = mm->mmap; | ||
173 | while (l-- && vma) | ||
174 | vma = vma->vm_next; | ||
175 | goto out; | ||
176 | } | ||
177 | |||
178 | if (l != mm->map_count) | ||
179 | tail_vma = NULL; /* After gate vma */ | ||
180 | |||
181 | out: | ||
182 | if (vma) | ||
183 | return vma; | ||
184 | |||
185 | /* End of vmas has been reached */ | ||
186 | m->version = (tail_vma != NULL)? 0: -1UL; | ||
187 | up_read(&mm->mmap_sem); | ||
188 | mmput(mm); | ||
189 | return tail_vma; | ||
190 | } | ||
191 | |||
192 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) | ||
193 | { | ||
194 | struct proc_maps_private *priv = m->private; | ||
195 | struct vm_area_struct *vma = v; | ||
196 | struct vm_area_struct *tail_vma = priv->tail_vma; | ||
197 | |||
198 | (*pos)++; | ||
199 | if (vma && (vma != tail_vma) && vma->vm_next) | ||
200 | return vma->vm_next; | ||
201 | vma_stop(priv, vma); | ||
202 | return (vma != tail_vma)? tail_vma: NULL; | ||
203 | } | ||
204 | |||
205 | static void m_stop(struct seq_file *m, void *v) | ||
206 | { | ||
207 | struct proc_maps_private *priv = m->private; | ||
208 | struct vm_area_struct *vma = v; | ||
209 | |||
210 | vma_stop(priv, vma); | ||
211 | if (priv->task) | ||
212 | put_task_struct(priv->task); | ||
213 | } | ||
214 | |||
215 | static int do_maps_open(struct inode *inode, struct file *file, | ||
216 | struct seq_operations *ops) | ||
217 | { | ||
218 | struct proc_maps_private *priv; | ||
219 | int ret = -ENOMEM; | ||
220 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
221 | if (priv) { | ||
222 | priv->pid = proc_pid(inode); | ||
223 | ret = seq_open(file, ops); | ||
224 | if (!ret) { | ||
225 | struct seq_file *m = file->private_data; | ||
226 | m->private = priv; | ||
227 | } else { | ||
228 | kfree(priv); | ||
229 | } | ||
230 | } | ||
231 | return ret; | ||
232 | } | ||
147 | 233 | ||
148 | static int show_map(struct seq_file *m, void *v) | 234 | static int show_map(struct seq_file *m, void *v) |
149 | { | 235 | { |
@@ -210,6 +296,56 @@ static int show_map(struct seq_file *m, void *v) | |||
210 | return 0; | 296 | return 0; |
211 | } | 297 | } |
212 | 298 | ||
299 | static struct seq_operations proc_pid_maps_op = { | ||
300 | .start = m_start, | ||
301 | .next = m_next, | ||
302 | .stop = m_stop, | ||
303 | .show = show_map | ||
304 | }; | ||
305 | |||
306 | static int maps_open(struct inode *inode, struct file *file) | ||
307 | { | ||
308 | return do_maps_open(inode, file, &proc_pid_maps_op); | ||
309 | } | ||
310 | |||
311 | const struct file_operations proc_maps_operations = { | ||
312 | .open = maps_open, | ||
313 | .read = seq_read, | ||
314 | .llseek = seq_lseek, | ||
315 | .release = seq_release_private, | ||
316 | }; | ||
317 | |||
318 | /* | ||
319 | * Proportional Set Size(PSS): my share of RSS. | ||
320 | * | ||
321 | * PSS of a process is the count of pages it has in memory, where each | ||
322 | * page is divided by the number of processes sharing it. So if a | ||
323 | * process has 1000 pages all to itself, and 1000 shared with one other | ||
324 | * process, its PSS will be 1500. | ||
325 | * | ||
326 | * To keep (accumulated) division errors low, we adopt a 64bit | ||
327 | * fixed-point pss counter to minimize division errors. So (pss >> | ||
328 | * PSS_SHIFT) would be the real byte count. | ||
329 | * | ||
330 | * A shift of 12 before division means (assuming 4K page size): | ||
331 | * - 1M 3-user-pages add up to 8KB errors; | ||
332 | * - supports mapcount up to 2^24, or 16M; | ||
333 | * - supports PSS up to 2^52 bytes, or 4PB. | ||
334 | */ | ||
335 | #define PSS_SHIFT 12 | ||
336 | |||
337 | struct mem_size_stats | ||
338 | { | ||
339 | struct vm_area_struct *vma; | ||
340 | unsigned long resident; | ||
341 | unsigned long shared_clean; | ||
342 | unsigned long shared_dirty; | ||
343 | unsigned long private_clean; | ||
344 | unsigned long private_dirty; | ||
345 | unsigned long referenced; | ||
346 | u64 pss; | ||
347 | }; | ||
348 | |||
213 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 349 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
214 | void *private) | 350 | void *private) |
215 | { | 351 | { |
@@ -255,33 +391,6 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
255 | return 0; | 391 | return 0; |
256 | } | 392 | } |
257 | 393 | ||
258 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | ||
259 | unsigned long end, void *private) | ||
260 | { | ||
261 | struct vm_area_struct *vma = private; | ||
262 | pte_t *pte, ptent; | ||
263 | spinlock_t *ptl; | ||
264 | struct page *page; | ||
265 | |||
266 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | ||
267 | for (; addr != end; pte++, addr += PAGE_SIZE) { | ||
268 | ptent = *pte; | ||
269 | if (!pte_present(ptent)) | ||
270 | continue; | ||
271 | |||
272 | page = vm_normal_page(vma, addr, ptent); | ||
273 | if (!page) | ||
274 | continue; | ||
275 | |||
276 | /* Clear accessed and referenced bits. */ | ||
277 | ptep_test_and_clear_young(vma, addr, pte); | ||
278 | ClearPageReferenced(page); | ||
279 | } | ||
280 | pte_unmap_unlock(pte - 1, ptl); | ||
281 | cond_resched(); | ||
282 | return 0; | ||
283 | } | ||
284 | |||
285 | static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; | 394 | static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; |
286 | 395 | ||
287 | static int show_smap(struct seq_file *m, void *v) | 396 | static int show_smap(struct seq_file *m, void *v) |
@@ -321,6 +430,52 @@ static int show_smap(struct seq_file *m, void *v) | |||
321 | return ret; | 430 | return ret; |
322 | } | 431 | } |
323 | 432 | ||
433 | static struct seq_operations proc_pid_smaps_op = { | ||
434 | .start = m_start, | ||
435 | .next = m_next, | ||
436 | .stop = m_stop, | ||
437 | .show = show_smap | ||
438 | }; | ||
439 | |||
440 | static int smaps_open(struct inode *inode, struct file *file) | ||
441 | { | ||
442 | return do_maps_open(inode, file, &proc_pid_smaps_op); | ||
443 | } | ||
444 | |||
445 | const struct file_operations proc_smaps_operations = { | ||
446 | .open = smaps_open, | ||
447 | .read = seq_read, | ||
448 | .llseek = seq_lseek, | ||
449 | .release = seq_release_private, | ||
450 | }; | ||
451 | |||
452 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | ||
453 | unsigned long end, void *private) | ||
454 | { | ||
455 | struct vm_area_struct *vma = private; | ||
456 | pte_t *pte, ptent; | ||
457 | spinlock_t *ptl; | ||
458 | struct page *page; | ||
459 | |||
460 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | ||
461 | for (; addr != end; pte++, addr += PAGE_SIZE) { | ||
462 | ptent = *pte; | ||
463 | if (!pte_present(ptent)) | ||
464 | continue; | ||
465 | |||
466 | page = vm_normal_page(vma, addr, ptent); | ||
467 | if (!page) | ||
468 | continue; | ||
469 | |||
470 | /* Clear accessed and referenced bits. */ | ||
471 | ptep_test_and_clear_young(vma, addr, pte); | ||
472 | ClearPageReferenced(page); | ||
473 | } | ||
474 | pte_unmap_unlock(pte - 1, ptl); | ||
475 | cond_resched(); | ||
476 | return 0; | ||
477 | } | ||
478 | |||
324 | static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; | 479 | static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; |
325 | 480 | ||
326 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 481 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
@@ -364,147 +519,6 @@ const struct file_operations proc_clear_refs_operations = { | |||
364 | .write = clear_refs_write, | 519 | .write = clear_refs_write, |
365 | }; | 520 | }; |
366 | 521 | ||
367 | static void *m_start(struct seq_file *m, loff_t *pos) | ||
368 | { | ||
369 | struct proc_maps_private *priv = m->private; | ||
370 | unsigned long last_addr = m->version; | ||
371 | struct mm_struct *mm; | ||
372 | struct vm_area_struct *vma, *tail_vma = NULL; | ||
373 | loff_t l = *pos; | ||
374 | |||
375 | /* Clear the per syscall fields in priv */ | ||
376 | priv->task = NULL; | ||
377 | priv->tail_vma = NULL; | ||
378 | |||
379 | /* | ||
380 | * We remember last_addr rather than next_addr to hit with | ||
381 | * mmap_cache most of the time. We have zero last_addr at | ||
382 | * the beginning and also after lseek. We will have -1 last_addr | ||
383 | * after the end of the vmas. | ||
384 | */ | ||
385 | |||
386 | if (last_addr == -1UL) | ||
387 | return NULL; | ||
388 | |||
389 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | ||
390 | if (!priv->task) | ||
391 | return NULL; | ||
392 | |||
393 | mm = mm_for_maps(priv->task); | ||
394 | if (!mm) | ||
395 | return NULL; | ||
396 | |||
397 | priv->tail_vma = tail_vma = get_gate_vma(priv->task); | ||
398 | |||
399 | /* Start with last addr hint */ | ||
400 | if (last_addr && (vma = find_vma(mm, last_addr))) { | ||
401 | vma = vma->vm_next; | ||
402 | goto out; | ||
403 | } | ||
404 | |||
405 | /* | ||
406 | * Check the vma index is within the range and do | ||
407 | * sequential scan until m_index. | ||
408 | */ | ||
409 | vma = NULL; | ||
410 | if ((unsigned long)l < mm->map_count) { | ||
411 | vma = mm->mmap; | ||
412 | while (l-- && vma) | ||
413 | vma = vma->vm_next; | ||
414 | goto out; | ||
415 | } | ||
416 | |||
417 | if (l != mm->map_count) | ||
418 | tail_vma = NULL; /* After gate vma */ | ||
419 | |||
420 | out: | ||
421 | if (vma) | ||
422 | return vma; | ||
423 | |||
424 | /* End of vmas has been reached */ | ||
425 | m->version = (tail_vma != NULL)? 0: -1UL; | ||
426 | up_read(&mm->mmap_sem); | ||
427 | mmput(mm); | ||
428 | return tail_vma; | ||
429 | } | ||
430 | |||
431 | static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) | ||
432 | { | ||
433 | if (vma && vma != priv->tail_vma) { | ||
434 | struct mm_struct *mm = vma->vm_mm; | ||
435 | up_read(&mm->mmap_sem); | ||
436 | mmput(mm); | ||
437 | } | ||
438 | } | ||
439 | |||
440 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) | ||
441 | { | ||
442 | struct proc_maps_private *priv = m->private; | ||
443 | struct vm_area_struct *vma = v; | ||
444 | struct vm_area_struct *tail_vma = priv->tail_vma; | ||
445 | |||
446 | (*pos)++; | ||
447 | if (vma && (vma != tail_vma) && vma->vm_next) | ||
448 | return vma->vm_next; | ||
449 | vma_stop(priv, vma); | ||
450 | return (vma != tail_vma)? tail_vma: NULL; | ||
451 | } | ||
452 | |||
453 | static void m_stop(struct seq_file *m, void *v) | ||
454 | { | ||
455 | struct proc_maps_private *priv = m->private; | ||
456 | struct vm_area_struct *vma = v; | ||
457 | |||
458 | vma_stop(priv, vma); | ||
459 | if (priv->task) | ||
460 | put_task_struct(priv->task); | ||
461 | } | ||
462 | |||
463 | static struct seq_operations proc_pid_maps_op = { | ||
464 | .start = m_start, | ||
465 | .next = m_next, | ||
466 | .stop = m_stop, | ||
467 | .show = show_map | ||
468 | }; | ||
469 | |||
470 | static struct seq_operations proc_pid_smaps_op = { | ||
471 | .start = m_start, | ||
472 | .next = m_next, | ||
473 | .stop = m_stop, | ||
474 | .show = show_smap | ||
475 | }; | ||
476 | |||
477 | static int do_maps_open(struct inode *inode, struct file *file, | ||
478 | struct seq_operations *ops) | ||
479 | { | ||
480 | struct proc_maps_private *priv; | ||
481 | int ret = -ENOMEM; | ||
482 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
483 | if (priv) { | ||
484 | priv->pid = proc_pid(inode); | ||
485 | ret = seq_open(file, ops); | ||
486 | if (!ret) { | ||
487 | struct seq_file *m = file->private_data; | ||
488 | m->private = priv; | ||
489 | } else { | ||
490 | kfree(priv); | ||
491 | } | ||
492 | } | ||
493 | return ret; | ||
494 | } | ||
495 | |||
496 | static int maps_open(struct inode *inode, struct file *file) | ||
497 | { | ||
498 | return do_maps_open(inode, file, &proc_pid_maps_op); | ||
499 | } | ||
500 | |||
501 | const struct file_operations proc_maps_operations = { | ||
502 | .open = maps_open, | ||
503 | .read = seq_read, | ||
504 | .llseek = seq_lseek, | ||
505 | .release = seq_release_private, | ||
506 | }; | ||
507 | |||
508 | #ifdef CONFIG_NUMA | 522 | #ifdef CONFIG_NUMA |
509 | extern int show_numa_map(struct seq_file *m, void *v); | 523 | extern int show_numa_map(struct seq_file *m, void *v); |
510 | 524 | ||
@@ -539,14 +553,3 @@ const struct file_operations proc_numa_maps_operations = { | |||
539 | }; | 553 | }; |
540 | #endif | 554 | #endif |
541 | 555 | ||
542 | static int smaps_open(struct inode *inode, struct file *file) | ||
543 | { | ||
544 | return do_maps_open(inode, file, &proc_pid_smaps_op); | ||
545 | } | ||
546 | |||
547 | const struct file_operations proc_smaps_operations = { | ||
548 | .open = smaps_open, | ||
549 | .read = seq_read, | ||
550 | .llseek = seq_lseek, | ||
551 | .release = seq_release_private, | ||
552 | }; | ||