aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <andi@firstfloor.org>2009-12-16 06:19:59 -0500
committerAndi Kleen <ak@linux.intel.com>2009-12-16 06:19:59 -0500
commit4fd466eb46a6a917c317a87fb94bfc7252a0f7ed (patch)
tree003b28724241a22a41dc9ae067f30beadbf76e6a
parentd324236b3333e87c8825b35f2104184734020d35 (diff)
HWPOISON: add memory cgroup filter
The hwpoison test suite need to inject hwpoison to a collection of selected task pages, and must not touch pages not owned by them and thus kill important system processes such as init. (But it's OK to mis-hwpoison free/unowned pages as well as shared clean pages. Mis-hwpoison of shared dirty pages will kill all tasks, so the test suite will target all or non of such tasks in the first place.) The memory cgroup serves this purpose well. We can put the target processes under the control of a memory cgroup, and tell the hwpoison injection code to only kill pages associated with some active memory cgroup. The prerequisite for doing hwpoison stress tests with mem_cgroup is, the mem_cgroup code tracks task pages _accurately_ (unless page is locked). Which we believe is/should be true. The benefits are simplification of hwpoison injector code. Also the mem_cgroup code will automatically be tested by hwpoison test cases. The alternative interfaces pin-pfn/unpin-pfn can also delegate the (process and page flags) filtering functions reliably to user space. However prototype implementation shows that this scheme adds more complexity than we wanted. Example test case: mkdir /cgroup/hwpoison usemem -m 100 -s 1000 & echo `jobs -p` > /cgroup/hwpoison/tasks memcg_ino=$(ls -id /cgroup/hwpoison | cut -f1 -d' ') echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg page-types -p `pidof init` --hwpoison # shall do nothing page-types -p `pidof usemem` --hwpoison # poison its pages [AK: Fix documentation] [Add fix for problem noticed by Li Zefan <lizf@cn.fujitsu.com>; dentry in the css could be NULL] CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> CC: Hugh Dickins <hugh.dickins@tiscali.co.uk> CC: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> CC: Balbir Singh <balbir@linux.vnet.ibm.com> CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> CC: Li Zefan <lizf@cn.fujitsu.com> CC: Paul Menage <menage@google.com> CC: Nick Piggin <npiggin@suse.de> CC: Andi Kleen <andi@firstfloor.org> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Andi Kleen <ak@linux.intel.com>
-rw-r--r--Documentation/vm/hwpoison.txt16
-rw-r--r--mm/hwpoison-inject.c7
-rw-r--r--mm/internal.h1
-rw-r--r--mm/memory-failure.c46
4 files changed, 70 insertions, 0 deletions
diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt
index f454d3cd4d60..989e5afe740f 100644
--- a/Documentation/vm/hwpoison.txt
+++ b/Documentation/vm/hwpoison.txt
@@ -123,6 +123,22 @@ Only handle memory failures to pages associated with the file system defined
123by block device major/minor. -1U is the wildcard value. 123by block device major/minor. -1U is the wildcard value.
124This should be only used for testing with artificial injection. 124This should be only used for testing with artificial injection.
125 125
126corrupt-filter-memcg
127
128Limit injection to pages owned by memgroup. Specified by inode number
129of the memcg.
130
131Example:
132 mkdir /cgroup/hwpoison
133
134 usemem -m 100 -s 1000 &
135 echo `jobs -p` > /cgroup/hwpoison/tasks
136
137 memcg_ino=$(ls -id /cgroup/hwpoison | cut -f1 -d' ')
138 echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg
139
140 page-types -p `pidof init` --hwpoison # shall do nothing
141 page-types -p `pidof usemem` --hwpoison # poison its pages
126 142
127corrupt-filter-flags-mask 143corrupt-filter-flags-mask
128corrupt-filter-flags-value 144corrupt-filter-flags-value
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index c4dfd89f654a..c838735ac31d 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -112,6 +112,13 @@ static int pfn_inject_init(void)
112 if (!dentry) 112 if (!dentry)
113 goto fail; 113 goto fail;
114 114
115#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
116 dentry = debugfs_create_u64("corrupt-filter-memcg", 0600,
117 hwpoison_dir, &hwpoison_filter_memcg);
118 if (!dentry)
119 goto fail;
120#endif
121
115 return 0; 122 return 0;
116fail: 123fail:
117 pfn_inject_exit(); 124 pfn_inject_exit();
diff --git a/mm/internal.h b/mm/internal.h
index b2027c73119b..5a6761bea6a6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -257,3 +257,4 @@ extern u32 hwpoison_filter_dev_major;
257extern u32 hwpoison_filter_dev_minor; 257extern u32 hwpoison_filter_dev_minor;
258extern u64 hwpoison_filter_flags_mask; 258extern u64 hwpoison_filter_flags_mask;
259extern u64 hwpoison_filter_flags_value; 259extern u64 hwpoison_filter_flags_value;
260extern u64 hwpoison_filter_memcg;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 22d2b2028e54..117ef1598469 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -100,6 +100,49 @@ static int hwpoison_filter_flags(struct page *p)
100 return -EINVAL; 100 return -EINVAL;
101} 101}
102 102
103/*
104 * This allows stress tests to limit test scope to a collection of tasks
105 * by putting them under some memcg. This prevents killing unrelated/important
106 * processes such as /sbin/init. Note that the target task may share clean
107 * pages with init (eg. libc text), which is harmless. If the target task
108 * share _dirty_ pages with another task B, the test scheme must make sure B
109 * is also included in the memcg. At last, due to race conditions this filter
110 * can only guarantee that the page either belongs to the memcg tasks, or is
111 * a freed page.
112 */
113#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
114u64 hwpoison_filter_memcg;
115EXPORT_SYMBOL_GPL(hwpoison_filter_memcg);
116static int hwpoison_filter_task(struct page *p)
117{
118 struct mem_cgroup *mem;
119 struct cgroup_subsys_state *css;
120 unsigned long ino;
121
122 if (!hwpoison_filter_memcg)
123 return 0;
124
125 mem = try_get_mem_cgroup_from_page(p);
126 if (!mem)
127 return -EINVAL;
128
129 css = mem_cgroup_css(mem);
130 /* root_mem_cgroup has NULL dentries */
131 if (!css->cgroup->dentry)
132 return -EINVAL;
133
134 ino = css->cgroup->dentry->d_inode->i_ino;
135 css_put(css);
136
137 if (ino != hwpoison_filter_memcg)
138 return -EINVAL;
139
140 return 0;
141}
142#else
143static int hwpoison_filter_task(struct page *p) { return 0; }
144#endif
145
103int hwpoison_filter(struct page *p) 146int hwpoison_filter(struct page *p)
104{ 147{
105 if (hwpoison_filter_dev(p)) 148 if (hwpoison_filter_dev(p))
@@ -108,6 +151,9 @@ int hwpoison_filter(struct page *p)
108 if (hwpoison_filter_flags(p)) 151 if (hwpoison_filter_flags(p))
109 return -EINVAL; 152 return -EINVAL;
110 153
154 if (hwpoison_filter_task(p))
155 return -EINVAL;
156
111 return 0; 157 return 0;
112} 158}
113EXPORT_SYMBOL_GPL(hwpoison_filter); 159EXPORT_SYMBOL_GPL(hwpoison_filter);