aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
authorKonstantin Khlebnikov <khlebnikov@openvz.org>2012-05-31 19:26:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-31 20:49:29 -0400
commit052fb0d635df5d49dfc85687d94e1a87bf09378d (patch)
tree2cde635970974b44a7363e5e940d38f20de46d39 /fs/proc
parent715be1fce0d964aca15618b24f6f415f3cbd03c8 (diff)
proc: report file/anon bit in /proc/pid/pagemap
This is an implementation of Andrew's proposal to extend the pagemap file bits to report what is missing about tasks' working set. The problem with the working set detection is multilateral. In the criu (checkpoint/restore) project we dump the tasks' memory into image files and to do it properly we need to detect which pages inside mappings are really in use. The mincore syscall I though could help with this did not. First, it doesn't report swapped pages, thus we cannot find out which parts of anonymous mappings to dump. Next, it does report pages from page cache as present even if they are not mapped, and it doesn't make that has not been cow-ed. Note, that issue with swap pages is critical -- we must dump swap pages to image file. But the issues with file pages are optimization -- we can take all file pages to image, this would be correct, but if we know that a page is not mapped or not cow-ed, we can remove them from dump file. The dump would still be self-consistent, though significantly smaller in size (up to 10 times smaller on real apps). Andrew noticed, that the proc pagemap file solved 2 of 3 above issues -- it reports whether a page is present or swapped and it doesn't report not mapped page cache pages. But, it doesn't distinguish cow-ed file pages from not cow-ed. I would like to make the last unused bit in this file to report whether the page mapped into respective pte is PageAnon or not. [comment stolen from Pavel Emelyanov's v1 patch] Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org> Cc: Pavel Emelyanov <xemul@parallels.com> Cc: Matt Mackall <mpm@selenic.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/task_mmu.c48
1 files changed, 30 insertions, 18 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 83ede6e3b5ec..02476f5889f1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -700,6 +700,7 @@ struct pagemapread {
700 700
701#define PM_PRESENT PM_STATUS(4LL) 701#define PM_PRESENT PM_STATUS(4LL)
702#define PM_SWAP PM_STATUS(2LL) 702#define PM_SWAP PM_STATUS(2LL)
703#define PM_FILE PM_STATUS(1LL)
703#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) 704#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT)
704#define PM_END_OF_BUFFER 1 705#define PM_END_OF_BUFFER 1
705 706
@@ -733,22 +734,33 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
733 return err; 734 return err;
734} 735}
735 736
736static u64 swap_pte_to_pagemap_entry(pte_t pte) 737static void pte_to_pagemap_entry(pagemap_entry_t *pme,
738 struct vm_area_struct *vma, unsigned long addr, pte_t pte)
737{ 739{
738 swp_entry_t e = pte_to_swp_entry(pte); 740 u64 frame, flags;
739 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); 741 struct page *page = NULL;
740} 742
741 743 if (pte_present(pte)) {
742static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte) 744 frame = pte_pfn(pte);
743{ 745 flags = PM_PRESENT;
744 if (is_swap_pte(pte)) 746 page = vm_normal_page(vma, addr, pte);
745 *pme = make_pme(PM_PFRAME(swap_pte_to_pagemap_entry(pte)) 747 } else if (is_swap_pte(pte)) {
746 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP); 748 swp_entry_t entry = pte_to_swp_entry(pte);
747 else if (pte_present(pte)) 749
748 *pme = make_pme(PM_PFRAME(pte_pfn(pte)) 750 frame = swp_type(entry) |
749 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); 751 (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
750 else 752 flags = PM_SWAP;
753 if (is_migration_entry(entry))
754 page = migration_entry_to_page(entry);
755 } else {
751 *pme = make_pme(PM_NOT_PRESENT); 756 *pme = make_pme(PM_NOT_PRESENT);
757 return;
758 }
759
760 if (page && !PageAnon(page))
761 flags |= PM_FILE;
762
763 *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags);
752} 764}
753 765
754#ifdef CONFIG_TRANSPARENT_HUGEPAGE 766#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -815,7 +827,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
815 if (vma && (vma->vm_start <= addr) && 827 if (vma && (vma->vm_start <= addr) &&
816 !is_vm_hugetlb_page(vma)) { 828 !is_vm_hugetlb_page(vma)) {
817 pte = pte_offset_map(pmd, addr); 829 pte = pte_offset_map(pmd, addr);
818 pte_to_pagemap_entry(&pme, *pte); 830 pte_to_pagemap_entry(&pme, vma, addr, *pte);
819 /* unmap before userspace copy */ 831 /* unmap before userspace copy */
820 pte_unmap(pte); 832 pte_unmap(pte);
821 } 833 }
@@ -869,11 +881,11 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
869 * For each page in the address space, this file contains one 64-bit entry 881 * For each page in the address space, this file contains one 64-bit entry
870 * consisting of the following: 882 * consisting of the following:
871 * 883 *
872 * Bits 0-55 page frame number (PFN) if present 884 * Bits 0-54 page frame number (PFN) if present
873 * Bits 0-4 swap type if swapped 885 * Bits 0-4 swap type if swapped
874 * Bits 5-55 swap offset if swapped 886 * Bits 5-54 swap offset if swapped
875 * Bits 55-60 page shift (page size = 1<<page shift) 887 * Bits 55-60 page shift (page size = 1<<page shift)
876 * Bit 61 reserved for future use 888 * Bit 61 page is file-page or shared-anon
877 * Bit 62 page swapped 889 * Bit 62 page swapped
878 * Bit 63 page present 890 * Bit 63 page present
879 * 891 *