aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@parallels.com>2013-07-03 18:01:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-03 19:07:26 -0400
commit0f8975ec4db2c8b5bd111b211292ca9be0feb6b8 (patch)
tree47bb0acc9fc3e783ad9cf33097a6636190f5e42b /fs
parent2b0a9f017548f05e42fbf7e67c4a626c1ebd5e12 (diff)
mm: soft-dirty bits for user memory changes tracking
The soft-dirty is a bit on a PTE which helps to track which pages a task writes to. In order to do this tracking one should 1. Clear soft-dirty bits from PTEs ("echo 4 > /proc/PID/clear_refs) 2. Wait some time. 3. Read soft-dirty bits (55'th in /proc/PID/pagemap2 entries) To do this tracking, the writable bit is cleared from PTEs when the soft-dirty bit is. Thus, after this, when the task tries to modify a page at some virtual address the #PF occurs and the kernel sets the soft-dirty bit on the respective PTE. Note, that although all the task's address space is marked as r/o after the soft-dirty bits clear, the #PF-s that occur after that are processed fast. This is so, since the pages are still mapped to physical memory, and thus all the kernel does is finds this fact out and puts back writable, dirty and soft-dirty bits on the PTE. Another thing to note, is that when mremap moves PTEs they are marked with soft-dirty as well, since from the user perspective mremap modifies the virtual memory at mremap's new address. Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Cc: Matt Mackall <mpm@selenic.com> Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Cc: Glauber Costa <glommer@parallels.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/proc/task_mmu.c47
1 files changed, 42 insertions, 5 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 39d641292579..a18e065c1c3e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -11,6 +11,7 @@
11#include <linux/rmap.h> 11#include <linux/rmap.h>
12#include <linux/swap.h> 12#include <linux/swap.h>
13#include <linux/swapops.h> 13#include <linux/swapops.h>
14#include <linux/mmu_notifier.h>
14 15
15#include <asm/elf.h> 16#include <asm/elf.h>
16#include <asm/uaccess.h> 17#include <asm/uaccess.h>
@@ -692,13 +693,32 @@ enum clear_refs_types {
692 CLEAR_REFS_ALL = 1, 693 CLEAR_REFS_ALL = 1,
693 CLEAR_REFS_ANON, 694 CLEAR_REFS_ANON,
694 CLEAR_REFS_MAPPED, 695 CLEAR_REFS_MAPPED,
696 CLEAR_REFS_SOFT_DIRTY,
695 CLEAR_REFS_LAST, 697 CLEAR_REFS_LAST,
696}; 698};
697 699
698struct clear_refs_private { 700struct clear_refs_private {
699 struct vm_area_struct *vma; 701 struct vm_area_struct *vma;
702 enum clear_refs_types type;
700}; 703};
701 704
705static inline void clear_soft_dirty(struct vm_area_struct *vma,
706 unsigned long addr, pte_t *pte)
707{
708#ifdef CONFIG_MEM_SOFT_DIRTY
709 /*
710 * The soft-dirty tracker uses #PF-s to catch writes
711 * to pages, so write-protect the pte as well. See the
712 * Documentation/vm/soft-dirty.txt for full description
713 * of how soft-dirty works.
714 */
715 pte_t ptent = *pte;
716 ptent = pte_wrprotect(ptent);
717 ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
718 set_pte_at(vma->vm_mm, addr, pte, ptent);
719#endif
720}
721
702static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, 722static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
703 unsigned long end, struct mm_walk *walk) 723 unsigned long end, struct mm_walk *walk)
704{ 724{
@@ -718,6 +738,11 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
718 if (!pte_present(ptent)) 738 if (!pte_present(ptent))
719 continue; 739 continue;
720 740
741 if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
742 clear_soft_dirty(vma, addr, pte);
743 continue;
744 }
745
721 page = vm_normal_page(vma, addr, ptent); 746 page = vm_normal_page(vma, addr, ptent);
722 if (!page) 747 if (!page)
723 continue; 748 continue;
@@ -759,6 +784,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
759 mm = get_task_mm(task); 784 mm = get_task_mm(task);
760 if (mm) { 785 if (mm) {
761 struct clear_refs_private cp = { 786 struct clear_refs_private cp = {
787 .type = type,
762 }; 788 };
763 struct mm_walk clear_refs_walk = { 789 struct mm_walk clear_refs_walk = {
764 .pmd_entry = clear_refs_pte_range, 790 .pmd_entry = clear_refs_pte_range,
@@ -766,6 +792,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
766 .private = &cp, 792 .private = &cp,
767 }; 793 };
768 down_read(&mm->mmap_sem); 794 down_read(&mm->mmap_sem);
795 if (type == CLEAR_REFS_SOFT_DIRTY)
796 mmu_notifier_invalidate_range_start(mm, 0, -1);
769 for (vma = mm->mmap; vma; vma = vma->vm_next) { 797 for (vma = mm->mmap; vma; vma = vma->vm_next) {
770 cp.vma = vma; 798 cp.vma = vma;
771 if (is_vm_hugetlb_page(vma)) 799 if (is_vm_hugetlb_page(vma))
@@ -786,6 +814,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
786 walk_page_range(vma->vm_start, vma->vm_end, 814 walk_page_range(vma->vm_start, vma->vm_end,
787 &clear_refs_walk); 815 &clear_refs_walk);
788 } 816 }
817 if (type == CLEAR_REFS_SOFT_DIRTY)
818 mmu_notifier_invalidate_range_end(mm, 0, -1);
789 flush_tlb_mm(mm); 819 flush_tlb_mm(mm);
790 up_read(&mm->mmap_sem); 820 up_read(&mm->mmap_sem);
791 mmput(mm); 821 mmput(mm);
@@ -827,6 +857,7 @@ struct pagemapread {
827/* in "new" pagemap pshift bits are occupied with more status bits */ 857/* in "new" pagemap pshift bits are occupied with more status bits */
828#define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT)) 858#define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT))
829 859
860#define __PM_SOFT_DIRTY (1LL)
830#define PM_PRESENT PM_STATUS(4LL) 861#define PM_PRESENT PM_STATUS(4LL)
831#define PM_SWAP PM_STATUS(2LL) 862#define PM_SWAP PM_STATUS(2LL)
832#define PM_FILE PM_STATUS(1LL) 863#define PM_FILE PM_STATUS(1LL)
@@ -868,6 +899,7 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
868{ 899{
869 u64 frame, flags; 900 u64 frame, flags;
870 struct page *page = NULL; 901 struct page *page = NULL;
902 int flags2 = 0;
871 903
872 if (pte_present(pte)) { 904 if (pte_present(pte)) {
873 frame = pte_pfn(pte); 905 frame = pte_pfn(pte);
@@ -888,13 +920,15 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
888 920
889 if (page && !PageAnon(page)) 921 if (page && !PageAnon(page))
890 flags |= PM_FILE; 922 flags |= PM_FILE;
923 if (pte_soft_dirty(pte))
924 flags2 |= __PM_SOFT_DIRTY;
891 925
892 *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, 0) | flags); 926 *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
893} 927}
894 928
895#ifdef CONFIG_TRANSPARENT_HUGEPAGE 929#ifdef CONFIG_TRANSPARENT_HUGEPAGE
896static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, 930static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
897 pmd_t pmd, int offset) 931 pmd_t pmd, int offset, int pmd_flags2)
898{ 932{
899 /* 933 /*
900 * Currently pmd for thp is always present because thp can not be 934 * Currently pmd for thp is always present because thp can not be
@@ -903,13 +937,13 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *p
903 */ 937 */
904 if (pmd_present(pmd)) 938 if (pmd_present(pmd))
905 *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) 939 *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
906 | PM_STATUS2(pm->v2, 0) | PM_PRESENT); 940 | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT);
907 else 941 else
908 *pme = make_pme(PM_NOT_PRESENT(pm->v2)); 942 *pme = make_pme(PM_NOT_PRESENT(pm->v2));
909} 943}
910#else 944#else
911static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, 945static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
912 pmd_t pmd, int offset) 946 pmd_t pmd, int offset, int pmd_flags2)
913{ 947{
914} 948}
915#endif 949#endif
@@ -926,12 +960,15 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
926 /* find the first VMA at or above 'addr' */ 960 /* find the first VMA at or above 'addr' */
927 vma = find_vma(walk->mm, addr); 961 vma = find_vma(walk->mm, addr);
928 if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { 962 if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
963 int pmd_flags2;
964
965 pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0);
929 for (; addr != end; addr += PAGE_SIZE) { 966 for (; addr != end; addr += PAGE_SIZE) {
930 unsigned long offset; 967 unsigned long offset;
931 968
932 offset = (addr & ~PAGEMAP_WALK_MASK) >> 969 offset = (addr & ~PAGEMAP_WALK_MASK) >>
933 PAGE_SHIFT; 970 PAGE_SHIFT;
934 thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset); 971 thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2);
935 err = add_to_pagemap(addr, &pme, pm); 972 err = add_to_pagemap(addr, &pme, pm);
936 if (err) 973 if (err)
937 break; 974 break;