diff options
author | David Rientjes <rientjes@google.com> | 2007-05-06 17:49:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-07 15:12:52 -0400 |
commit | b813e931b4c8235bb42e301096ea97dbdee3e8fe (patch) | |
tree | f8182687bffe8e3b95bac69b2cc7fdfe674ddc53 | |
parent | f79f177c25016647cc92ffac8afa7cb96ce47011 (diff) |
smaps: add clear_refs file to clear reference
Adds /proc/pid/clear_refs. When any non-zero number is written to this file,
pte_mkold() and ClearPageReferenced() is called for each pte and its
corresponding page, respectively, in that task's VMAs. This file is only
writable by the user who owns the task.
It is now possible to measure _approximately_ how much memory a task is using
by clearing the reference bits with
echo 1 > /proc/pid/clear_refs
and checking the reference count for each VMA from the /proc/pid/smaps output
at a measured time interval. For example, to observe the approximate change
in memory footprint for a task, write a script that clears the references
(echo 1 > /proc/pid/clear_refs), sleeps, and then greps for Pgs_Referenced and
extracts the size in kB. Add the sizes for each VMA together for the total
referenced footprint. Moments later, repeat the process and observe the
difference.
For example, using an efficient Mozilla:
accumulated time referenced memory
---------------- -----------------
0 s 408 kB
1 s 408 kB
2 s 556 kB
3 s 1028 kB
4 s 872 kB
5 s 1956 kB
6 s 416 kB
7 s 1560 kB
8 s 2336 kB
9 s 1044 kB
10 s 416 kB
This is a valuable tool to get an approximate measurement of the memory
footprint for a task.
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Christoph Lameter <clameter@sgi.com>
Signed-off-by: David Rientjes <rientjes@google.com>
[akpm@linux-foundation.org: build fixes]
[mpm@selenic.com: rename for_each_pmd]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/filesystems/proc.txt | 31 | ||||
-rw-r--r-- | fs/proc/base.c | 36 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 79 | ||||
-rw-r--r-- | include/linux/proc_fs.h | 1 |
4 files changed, 116 insertions, 31 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 7aaf09b86a55..3f4b226572e7 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -122,21 +122,22 @@ subdirectory has the entries listed in Table 1-1. | |||
122 | 122 | ||
123 | Table 1-1: Process specific entries in /proc | 123 | Table 1-1: Process specific entries in /proc |
124 | .............................................................................. | 124 | .............................................................................. |
125 | File Content | 125 | File Content |
126 | cmdline Command line arguments | 126 | clear_refs Clears page referenced bits shown in smaps output |
127 | cpu Current and last cpu in which it was executed (2.4)(smp) | 127 | cmdline Command line arguments |
128 | cwd Link to the current working directory | 128 | cpu Current and last cpu in which it was executed (2.4)(smp) |
129 | environ Values of environment variables | 129 | cwd Link to the current working directory |
130 | exe Link to the executable of this process | 130 | environ Values of environment variables |
131 | fd Directory, which contains all file descriptors | 131 | exe Link to the executable of this process |
132 | maps Memory maps to executables and library files (2.4) | 132 | fd Directory, which contains all file descriptors |
133 | mem Memory held by this process | 133 | maps Memory maps to executables and library files (2.4) |
134 | root Link to the root directory of this process | 134 | mem Memory held by this process |
135 | stat Process status | 135 | root Link to the root directory of this process |
136 | statm Process memory status information | 136 | stat Process status |
137 | status Process status in human readable form | 137 | statm Process memory status information |
138 | wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan | 138 | status Process status in human readable form |
139 | smaps Extension based on maps, presenting the rss size for each mapped file | 139 | wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan |
140 | smaps Extension based on maps, the rss size for each mapped file | ||
140 | .............................................................................. | 141 | .............................................................................. |
141 | 142 | ||
142 | For example, to get the status information of a process, all you have to do is | 143 | For example, to get the status information of a process, all you have to do is |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 989af5e55d1b..ec158dd02b3a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -715,6 +715,40 @@ static const struct file_operations proc_oom_adjust_operations = { | |||
715 | .write = oom_adjust_write, | 715 | .write = oom_adjust_write, |
716 | }; | 716 | }; |
717 | 717 | ||
718 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | ||
719 | size_t count, loff_t *ppos) | ||
720 | { | ||
721 | struct task_struct *task; | ||
722 | char buffer[PROC_NUMBUF], *end; | ||
723 | struct mm_struct *mm; | ||
724 | |||
725 | memset(buffer, 0, sizeof(buffer)); | ||
726 | if (count > sizeof(buffer) - 1) | ||
727 | count = sizeof(buffer) - 1; | ||
728 | if (copy_from_user(buffer, buf, count)) | ||
729 | return -EFAULT; | ||
730 | if (!simple_strtol(buffer, &end, 0)) | ||
731 | return -EINVAL; | ||
732 | if (*end == '\n') | ||
733 | end++; | ||
734 | task = get_proc_task(file->f_path.dentry->d_inode); | ||
735 | if (!task) | ||
736 | return -ESRCH; | ||
737 | mm = get_task_mm(task); | ||
738 | if (mm) { | ||
739 | clear_refs_smap(mm); | ||
740 | mmput(mm); | ||
741 | } | ||
742 | put_task_struct(task); | ||
743 | if (end - buffer == 0) | ||
744 | return -EIO; | ||
745 | return end - buffer; | ||
746 | } | ||
747 | |||
748 | static struct file_operations proc_clear_refs_operations = { | ||
749 | .write = clear_refs_write, | ||
750 | }; | ||
751 | |||
718 | #ifdef CONFIG_AUDITSYSCALL | 752 | #ifdef CONFIG_AUDITSYSCALL |
719 | #define TMPBUFLEN 21 | 753 | #define TMPBUFLEN 21 |
720 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, | 754 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, |
@@ -1851,6 +1885,7 @@ static struct pid_entry tgid_base_stuff[] = { | |||
1851 | REG("mounts", S_IRUGO, mounts), | 1885 | REG("mounts", S_IRUGO, mounts), |
1852 | REG("mountstats", S_IRUSR, mountstats), | 1886 | REG("mountstats", S_IRUSR, mountstats), |
1853 | #ifdef CONFIG_MMU | 1887 | #ifdef CONFIG_MMU |
1888 | REG("clear_refs", S_IWUSR, clear_refs), | ||
1854 | REG("smaps", S_IRUGO, smaps), | 1889 | REG("smaps", S_IRUGO, smaps), |
1855 | #endif | 1890 | #endif |
1856 | #ifdef CONFIG_SECURITY | 1891 | #ifdef CONFIG_SECURITY |
@@ -2132,6 +2167,7 @@ static struct pid_entry tid_base_stuff[] = { | |||
2132 | LNK("exe", exe), | 2167 | LNK("exe", exe), |
2133 | REG("mounts", S_IRUGO, mounts), | 2168 | REG("mounts", S_IRUGO, mounts), |
2134 | #ifdef CONFIG_MMU | 2169 | #ifdef CONFIG_MMU |
2170 | REG("clear_refs", S_IWUSR, clear_refs), | ||
2135 | REG("smaps", S_IRUGO, smaps), | 2171 | REG("smaps", S_IRUGO, smaps), |
2136 | #endif | 2172 | #endif |
2137 | #ifdef CONFIG_SECURITY | 2173 | #ifdef CONFIG_SECURITY |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 199088ee969b..4008c060f7ef 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -195,7 +195,7 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats | |||
195 | "Shared_Dirty: %8lu kB\n" | 195 | "Shared_Dirty: %8lu kB\n" |
196 | "Private_Clean: %8lu kB\n" | 196 | "Private_Clean: %8lu kB\n" |
197 | "Private_Dirty: %8lu kB\n" | 197 | "Private_Dirty: %8lu kB\n" |
198 | "Pgs_Referenced: %8lu kB\n", | 198 | "Referenced: %8lu kB\n", |
199 | (vma->vm_end - vma->vm_start) >> 10, | 199 | (vma->vm_end - vma->vm_start) >> 10, |
200 | mss->resident >> 10, | 200 | mss->resident >> 10, |
201 | mss->shared_clean >> 10, | 201 | mss->shared_clean >> 10, |
@@ -214,9 +214,9 @@ static int show_map(struct seq_file *m, void *v) | |||
214 | return show_map_internal(m, v, NULL); | 214 | return show_map_internal(m, v, NULL); |
215 | } | 215 | } |
216 | 216 | ||
217 | static void smaps_one_pmd(struct vm_area_struct *vma, pmd_t *pmd, | 217 | static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
218 | unsigned long addr, unsigned long end, | 218 | unsigned long addr, unsigned long end, |
219 | void *private) | 219 | void *private) |
220 | { | 220 | { |
221 | struct mem_size_stats *mss = private; | 221 | struct mem_size_stats *mss = private; |
222 | pte_t *pte, ptent; | 222 | pte_t *pte, ptent; |
@@ -254,8 +254,34 @@ static void smaps_one_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
254 | cond_resched(); | 254 | cond_resched(); |
255 | } | 255 | } |
256 | 256 | ||
257 | static inline void for_each_pmd_in_pud(struct pmd_walker *walker, pud_t *pud, | 257 | static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
258 | unsigned long addr, unsigned long end) | 258 | unsigned long addr, unsigned long end, |
259 | void *private) | ||
260 | { | ||
261 | pte_t *pte, ptent; | ||
262 | spinlock_t *ptl; | ||
263 | struct page *page; | ||
264 | |||
265 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | ||
266 | for (; addr != end; pte++, addr += PAGE_SIZE) { | ||
267 | ptent = *pte; | ||
268 | if (!pte_present(ptent)) | ||
269 | continue; | ||
270 | |||
271 | page = vm_normal_page(vma, addr, ptent); | ||
272 | if (!page) | ||
273 | continue; | ||
274 | |||
275 | /* Clear accessed and referenced bits. */ | ||
276 | ptep_test_and_clear_young(vma, addr, pte); | ||
277 | ClearPageReferenced(page); | ||
278 | } | ||
279 | pte_unmap_unlock(pte - 1, ptl); | ||
280 | cond_resched(); | ||
281 | } | ||
282 | |||
283 | static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud, | ||
284 | unsigned long addr, unsigned long end) | ||
259 | { | 285 | { |
260 | pmd_t *pmd; | 286 | pmd_t *pmd; |
261 | unsigned long next; | 287 | unsigned long next; |
@@ -269,8 +295,8 @@ static inline void for_each_pmd_in_pud(struct pmd_walker *walker, pud_t *pud, | |||
269 | } | 295 | } |
270 | } | 296 | } |
271 | 297 | ||
272 | static inline void for_each_pud_in_pgd(struct pmd_walker *walker, pgd_t *pgd, | 298 | static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd, |
273 | unsigned long addr, unsigned long end) | 299 | unsigned long addr, unsigned long end) |
274 | { | 300 | { |
275 | pud_t *pud; | 301 | pud_t *pud; |
276 | unsigned long next; | 302 | unsigned long next; |
@@ -280,15 +306,24 @@ static inline void for_each_pud_in_pgd(struct pmd_walker *walker, pgd_t *pgd, | |||
280 | next = pud_addr_end(addr, end); | 306 | next = pud_addr_end(addr, end); |
281 | if (pud_none_or_clear_bad(pud)) | 307 | if (pud_none_or_clear_bad(pud)) |
282 | continue; | 308 | continue; |
283 | for_each_pmd_in_pud(walker, pud, addr, next); | 309 | walk_pmd_range(walker, pud, addr, next); |
284 | } | 310 | } |
285 | } | 311 | } |
286 | 312 | ||
287 | static inline void for_each_pmd(struct vm_area_struct *vma, | 313 | /* |
288 | void (*action)(struct vm_area_struct *, pmd_t *, | 314 | * walk_page_range - walk the page tables of a VMA with a callback |
289 | unsigned long, unsigned long, | 315 | * @vma - VMA to walk |
290 | void *), | 316 | * @action - callback invoked for every bottom-level (PTE) page table |
291 | void *private) | 317 | * @private - private data passed to the callback function |
318 | * | ||
319 | * Recursively walk the page table for the memory area in a VMA, calling | ||
320 | * a callback for every bottom-level (PTE) page table. | ||
321 | */ | ||
322 | static inline void walk_page_range(struct vm_area_struct *vma, | ||
323 | void (*action)(struct vm_area_struct *, | ||
324 | pmd_t *, unsigned long, | ||
325 | unsigned long, void *), | ||
326 | void *private) | ||
292 | { | 327 | { |
293 | unsigned long addr = vma->vm_start; | 328 | unsigned long addr = vma->vm_start; |
294 | unsigned long end = vma->vm_end; | 329 | unsigned long end = vma->vm_end; |
@@ -305,7 +340,7 @@ static inline void for_each_pmd(struct vm_area_struct *vma, | |||
305 | next = pgd_addr_end(addr, end); | 340 | next = pgd_addr_end(addr, end); |
306 | if (pgd_none_or_clear_bad(pgd)) | 341 | if (pgd_none_or_clear_bad(pgd)) |
307 | continue; | 342 | continue; |
308 | for_each_pud_in_pgd(&walker, pgd, addr, next); | 343 | walk_pud_range(&walker, pgd, addr, next); |
309 | } | 344 | } |
310 | } | 345 | } |
311 | 346 | ||
@@ -316,10 +351,22 @@ static int show_smap(struct seq_file *m, void *v) | |||
316 | 351 | ||
317 | memset(&mss, 0, sizeof mss); | 352 | memset(&mss, 0, sizeof mss); |
318 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 353 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) |
319 | for_each_pmd(vma, smaps_one_pmd, &mss); | 354 | walk_page_range(vma, smaps_pte_range, &mss); |
320 | return show_map_internal(m, v, &mss); | 355 | return show_map_internal(m, v, &mss); |
321 | } | 356 | } |
322 | 357 | ||
358 | void clear_refs_smap(struct mm_struct *mm) | ||
359 | { | ||
360 | struct vm_area_struct *vma; | ||
361 | |||
362 | down_read(&mm->mmap_sem); | ||
363 | for (vma = mm->mmap; vma; vma = vma->vm_next) | ||
364 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | ||
365 | walk_page_range(vma, clear_refs_pte_range, NULL); | ||
366 | flush_tlb_mm(mm); | ||
367 | up_read(&mm->mmap_sem); | ||
368 | } | ||
369 | |||
323 | static void *m_start(struct seq_file *m, loff_t *pos) | 370 | static void *m_start(struct seq_file *m, loff_t *pos) |
324 | { | 371 | { |
325 | struct proc_maps_private *priv = m->private; | 372 | struct proc_maps_private *priv = m->private; |
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index be4652a0545a..f4f7a63cae1f 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h | |||
@@ -104,6 +104,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); | |||
104 | unsigned long task_vsize(struct mm_struct *); | 104 | unsigned long task_vsize(struct mm_struct *); |
105 | int task_statm(struct mm_struct *, int *, int *, int *, int *); | 105 | int task_statm(struct mm_struct *, int *, int *, int *, int *); |
106 | char *task_mem(struct mm_struct *, char *); | 106 | char *task_mem(struct mm_struct *, char *); |
107 | void clear_refs_smap(struct mm_struct *mm); | ||
107 | 108 | ||
108 | extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, | 109 | extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, |
109 | struct proc_dir_entry *parent); | 110 | struct proc_dir_entry *parent); |