aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@parallels.com>2013-07-03 18:01:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-03 19:07:25 -0400
commit040fa02077de01c7e08fa75be6125e4ca5636011 (patch)
treeeca8f59f428d9b9b1d57c175509c786208e1e793 /fs
parent1c8fca1d92e14859159a82b8a380d220139b7344 (diff)
clear_refs: sanitize accepted commands declaration
This is the implementation of the soft-dirty bit concept that should help keep track of changes in user memory, which in turn is very-very required by the checkpoint-restore project (http://criu.org). To create a dump of an application(s) we save all the information about it to files, and the biggest part of such dump is the contents of tasks' memory. However, there are usage scenarios where it's not required to get _all_ the task memory while creating a dump. For example, when doing periodical dumps, it's only required to take full memory dump only at the first step and then take incremental changes of memory. Another example is live migration. We copy all the memory to the destination node without stopping all tasks, then stop them, check for what pages has changed, dump it and the rest of the state, then copy it to the destination node. This decreases freeze time significantly. That said, some help from kernel to watch how processes modify the contents of their memory is required. The proposal is to track changes with the help of new soft-dirty bit this way: 1. First do "echo 4 > /proc/$pid/clear_refs". At that point kernel clears the soft dirty _and_ the writable bits from all ptes of process $pid. From now on every write to any page will result in #pf and the subsequent call to pte_mkdirty/pmd_mkdirty, which in turn will set the soft dirty flag. 2. Then read the /proc/$pid/pagemap2 and check the soft-dirty bit reported there (the 55'th one). If set, the respective pte was written to since last call to clear refs. The soft-dirty bit is the _PAGE_BIT_HIDDEN one. Although it's used by kmemcheck, the latter one marks kernel pages with it, while the former bit is put on user pages so they do not conflict to each other. This patch: A new clear-refs type will be added in the next patch, so prepare code for that. [akpm@linux-foundation.org: don't assume that sizeof(enum clear_refs_types) == sizeof(int)] Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Cc: Matt Mackall <mpm@selenic.com> Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Cc: Glauber Costa <glommer@parallels.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/proc/task_mmu.c19
1 files changed, 12 insertions, 7 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3e636d864d56..dad0809db551 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -688,6 +688,13 @@ const struct file_operations proc_tid_smaps_operations = {
688 .release = seq_release_private, 688 .release = seq_release_private,
689}; 689};
690 690
691enum clear_refs_types {
692 CLEAR_REFS_ALL = 1,
693 CLEAR_REFS_ANON,
694 CLEAR_REFS_MAPPED,
695 CLEAR_REFS_LAST,
696};
697
691static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, 698static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
692 unsigned long end, struct mm_walk *walk) 699 unsigned long end, struct mm_walk *walk)
693{ 700{
@@ -719,10 +726,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
719 return 0; 726 return 0;
720} 727}
721 728
722#define CLEAR_REFS_ALL 1
723#define CLEAR_REFS_ANON 2
724#define CLEAR_REFS_MAPPED 3
725
726static ssize_t clear_refs_write(struct file *file, const char __user *buf, 729static ssize_t clear_refs_write(struct file *file, const char __user *buf,
727 size_t count, loff_t *ppos) 730 size_t count, loff_t *ppos)
728{ 731{
@@ -730,7 +733,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
730 char buffer[PROC_NUMBUF]; 733 char buffer[PROC_NUMBUF];
731 struct mm_struct *mm; 734 struct mm_struct *mm;
732 struct vm_area_struct *vma; 735 struct vm_area_struct *vma;
733 int type; 736 enum clear_refs_types type;
737 int itype;
734 int rv; 738 int rv;
735 739
736 memset(buffer, 0, sizeof(buffer)); 740 memset(buffer, 0, sizeof(buffer));
@@ -738,10 +742,11 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
738 count = sizeof(buffer) - 1; 742 count = sizeof(buffer) - 1;
739 if (copy_from_user(buffer, buf, count)) 743 if (copy_from_user(buffer, buf, count))
740 return -EFAULT; 744 return -EFAULT;
741 rv = kstrtoint(strstrip(buffer), 10, &type); 745 rv = kstrtoint(strstrip(buffer), 10, &itype);
742 if (rv < 0) 746 if (rv < 0)
743 return rv; 747 return rv;
744 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) 748 type = (enum clear_refs_types)itype;
749 if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
745 return -EINVAL; 750 return -EINVAL;
746 task = get_proc_task(file_inode(file)); 751 task = get_proc_task(file_inode(file));
747 if (!task) 752 if (!task)