diff options
author | Wu Fengguang <fengguang.wu@intel.com> | 2009-12-16 06:19:59 -0500 |
---|---|---|
committer | Andi Kleen <ak@linux.intel.com> | 2009-12-16 06:19:59 -0500 |
commit | 7c116f2b0dbac4a1dd051c7a5e8cef37701cafd4 (patch) | |
tree | ac7f1e56551df46bc79e400a182a57f4eae5ddaf | |
parent | 138ce286eb6ee6d39ca4fb50516e93adaf6b605f (diff) |
HWPOISON: add fs/device filters
Filesystem data/metadata present the most tricky-to-isolate pages.
It requires careful code review and stress testing to get them right.
The fs/device filter helps to target the stress tests to some specific
filesystem pages. The filter condition is block device's major/minor
numbers:
- corrupt-filter-dev-major
- corrupt-filter-dev-minor
When specified (non -1), only page cache pages that belong to that
device will be poisoned.
The filters are checked reliably on the locked and refcounted page.
Haicheng: clear PG_hwpoison and drop bad page count if filter not OK
AK: Add documentation
CC: Haicheng Li <haicheng.li@intel.com>
CC: Nick Piggin <npiggin@suse.de>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
-rw-r--r-- | Documentation/vm/hwpoison.txt | 7 | ||||
-rw-r--r-- | mm/hwpoison-inject.c | 11 | ||||
-rw-r--r-- | mm/internal.h | 3 | ||||
-rw-r--r-- | mm/memory-failure.c | 51 |
4 files changed, 72 insertions, 0 deletions
diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt index f047e75acb23..fdf580464324 100644 --- a/Documentation/vm/hwpoison.txt +++ b/Documentation/vm/hwpoison.txt | |||
@@ -115,6 +115,13 @@ memory failures. | |||
115 | Note these injection interfaces are not stable and might change between | 115 | Note these injection interfaces are not stable and might change between |
116 | kernel versions | 116 | kernel versions |
117 | 117 | ||
118 | corrupt-filter-dev-major | ||
119 | corrupt-filter-dev-minor | ||
120 | |||
121 | Only handle memory failures to pages associated with the file system defined | ||
122 | by block device major/minor. -1U is the wildcard value. | ||
123 | This should be only used for testing with artificial injection. | ||
124 | |||
118 | Architecture specific MCE injector | 125 | Architecture specific MCE injector |
119 | 126 | ||
120 | x86 has mce-inject, mce-test | 127 | x86 has mce-inject, mce-test |
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index 6e35e563bf50..ac692a9b766c 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/debugfs.h> | 3 | #include <linux/debugfs.h> |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
6 | #include "internal.h" | ||
6 | 7 | ||
7 | static struct dentry *hwpoison_dir; | 8 | static struct dentry *hwpoison_dir; |
8 | 9 | ||
@@ -54,6 +55,16 @@ static int pfn_inject_init(void) | |||
54 | if (!dentry) | 55 | if (!dentry) |
55 | goto fail; | 56 | goto fail; |
56 | 57 | ||
58 | dentry = debugfs_create_u32("corrupt-filter-dev-major", 0600, | ||
59 | hwpoison_dir, &hwpoison_filter_dev_major); | ||
60 | if (!dentry) | ||
61 | goto fail; | ||
62 | |||
63 | dentry = debugfs_create_u32("corrupt-filter-dev-minor", 0600, | ||
64 | hwpoison_dir, &hwpoison_filter_dev_minor); | ||
65 | if (!dentry) | ||
66 | goto fail; | ||
67 | |||
57 | return 0; | 68 | return 0; |
58 | fail: | 69 | fail: |
59 | pfn_inject_exit(); | 70 | pfn_inject_exit(); |
diff --git a/mm/internal.h b/mm/internal.h index 49b2ff776b78..814da335f050 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -250,3 +250,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
250 | #define ZONE_RECLAIM_SOME 0 | 250 | #define ZONE_RECLAIM_SOME 0 |
251 | #define ZONE_RECLAIM_SUCCESS 1 | 251 | #define ZONE_RECLAIM_SUCCESS 1 |
252 | #endif | 252 | #endif |
253 | |||
254 | extern u32 hwpoison_filter_dev_major; | ||
255 | extern u32 hwpoison_filter_dev_minor; | ||
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index edeaf2319e74..82ac73436d0e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -48,6 +48,50 @@ int sysctl_memory_failure_recovery __read_mostly = 1; | |||
48 | 48 | ||
49 | atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0); | 49 | atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0); |
50 | 50 | ||
51 | u32 hwpoison_filter_dev_major = ~0U; | ||
52 | u32 hwpoison_filter_dev_minor = ~0U; | ||
53 | EXPORT_SYMBOL_GPL(hwpoison_filter_dev_major); | ||
54 | EXPORT_SYMBOL_GPL(hwpoison_filter_dev_minor); | ||
55 | |||
56 | static int hwpoison_filter_dev(struct page *p) | ||
57 | { | ||
58 | struct address_space *mapping; | ||
59 | dev_t dev; | ||
60 | |||
61 | if (hwpoison_filter_dev_major == ~0U && | ||
62 | hwpoison_filter_dev_minor == ~0U) | ||
63 | return 0; | ||
64 | |||
65 | /* | ||
66 | * page_mapping() does not accept slab page | ||
67 | */ | ||
68 | if (PageSlab(p)) | ||
69 | return -EINVAL; | ||
70 | |||
71 | mapping = page_mapping(p); | ||
72 | if (mapping == NULL || mapping->host == NULL) | ||
73 | return -EINVAL; | ||
74 | |||
75 | dev = mapping->host->i_sb->s_dev; | ||
76 | if (hwpoison_filter_dev_major != ~0U && | ||
77 | hwpoison_filter_dev_major != MAJOR(dev)) | ||
78 | return -EINVAL; | ||
79 | if (hwpoison_filter_dev_minor != ~0U && | ||
80 | hwpoison_filter_dev_minor != MINOR(dev)) | ||
81 | return -EINVAL; | ||
82 | |||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | int hwpoison_filter(struct page *p) | ||
87 | { | ||
88 | if (hwpoison_filter_dev(p)) | ||
89 | return -EINVAL; | ||
90 | |||
91 | return 0; | ||
92 | } | ||
93 | EXPORT_SYMBOL_GPL(hwpoison_filter); | ||
94 | |||
51 | /* | 95 | /* |
52 | * Send all the processes who have the page mapped an ``action optional'' | 96 | * Send all the processes who have the page mapped an ``action optional'' |
53 | * signal. | 97 | * signal. |
@@ -843,6 +887,13 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
843 | res = 0; | 887 | res = 0; |
844 | goto out; | 888 | goto out; |
845 | } | 889 | } |
890 | if (hwpoison_filter(p)) { | ||
891 | if (TestClearPageHWPoison(p)) | ||
892 | atomic_long_dec(&mce_bad_pages); | ||
893 | unlock_page(p); | ||
894 | put_page(p); | ||
895 | return 0; | ||
896 | } | ||
846 | 897 | ||
847 | wait_on_page_writeback(p); | 898 | wait_on_page_writeback(p); |
848 | 899 | ||