diff options
author | Andi Kleen <andi@firstfloor.org> | 2009-12-16 06:20:00 -0500 |
---|---|---|
committer | Andi Kleen <ak@linux.intel.com> | 2009-12-16 06:20:00 -0500 |
commit | facb6011f3993947283fa15d039dacb4ad140230 (patch) | |
tree | c317e401fa7c867e1652879627163331f43085ef /drivers/base | |
parent | 2326c467df4ff814dc07cf1bdaa1e6e0a9c9f21c (diff) |
HWPOISON: Add soft page offline support
This is a simpler, gentler variant of memory_failure() for soft page
offlining controlled from user space. It doesn't kill anything, just
tries to invalidate and if that doesn't work migrate the
page away.
This is useful for predictive failure analysis, where a page has
a high rate of corrected errors, but hasn't gone bad yet. Instead
it can be offlined early and avoided.
The offlining is controlled from sysfs, including a new generic
entry point for hard page offlining for symmetry too.
We use the page isolate facility to prevent re-allocation
race. Normally this is only used by memory hotplug. To avoid
races with memory allocation I am using lock_system_sleep().
This avoids the situation where memory hotplug is about
to isolate a page range and then hwpoison undoes that work.
This is a big hammer currently, but the simplest solution
currently.
When the page is not free or LRU we try to free pages
from slab and other caches. The slab freeing is currently
quite dumb and does not try to focus on the specific slab
cache which might own the page. This could be potentially
improved later.
Thanks to Fengguang Wu and Haicheng Li for some fixes.
[Added fix from Andrew Morton to adapt to new migrate_pages prototype]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Diffstat (limited to 'drivers/base')
-rw-r--r-- | drivers/base/memory.c | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 989429cfed88..c4c8f2e1dd15 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c | |||
@@ -341,6 +341,64 @@ static inline int memory_probe_init(void) | |||
341 | } | 341 | } |
342 | #endif | 342 | #endif |
343 | 343 | ||
344 | #ifdef CONFIG_MEMORY_FAILURE | ||
345 | /* | ||
346 | * Support for offlining pages of memory | ||
347 | */ | ||
348 | |||
349 | /* Soft offline a page */ | ||
350 | static ssize_t | ||
351 | store_soft_offline_page(struct class *class, const char *buf, size_t count) | ||
352 | { | ||
353 | int ret; | ||
354 | u64 pfn; | ||
355 | if (!capable(CAP_SYS_ADMIN)) | ||
356 | return -EPERM; | ||
357 | if (strict_strtoull(buf, 0, &pfn) < 0) | ||
358 | return -EINVAL; | ||
359 | pfn >>= PAGE_SHIFT; | ||
360 | if (!pfn_valid(pfn)) | ||
361 | return -ENXIO; | ||
362 | ret = soft_offline_page(pfn_to_page(pfn), 0); | ||
363 | return ret == 0 ? count : ret; | ||
364 | } | ||
365 | |||
366 | /* Forcibly offline a page, including killing processes. */ | ||
367 | static ssize_t | ||
368 | store_hard_offline_page(struct class *class, const char *buf, size_t count) | ||
369 | { | ||
370 | int ret; | ||
371 | u64 pfn; | ||
372 | if (!capable(CAP_SYS_ADMIN)) | ||
373 | return -EPERM; | ||
374 | if (strict_strtoull(buf, 0, &pfn) < 0) | ||
375 | return -EINVAL; | ||
376 | pfn >>= PAGE_SHIFT; | ||
377 | ret = __memory_failure(pfn, 0, 0); | ||
378 | return ret ? ret : count; | ||
379 | } | ||
380 | |||
381 | static CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page); | ||
382 | static CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page); | ||
383 | |||
384 | static __init int memory_fail_init(void) | ||
385 | { | ||
386 | int err; | ||
387 | |||
388 | err = sysfs_create_file(&memory_sysdev_class.kset.kobj, | ||
389 | &class_attr_soft_offline_page.attr); | ||
390 | if (!err) | ||
391 | err = sysfs_create_file(&memory_sysdev_class.kset.kobj, | ||
392 | &class_attr_hard_offline_page.attr); | ||
393 | return err; | ||
394 | } | ||
395 | #else | ||
396 | static inline int memory_fail_init(void) | ||
397 | { | ||
398 | return 0; | ||
399 | } | ||
400 | #endif | ||
401 | |||
344 | /* | 402 | /* |
345 | * Note that phys_device is optional. It is here to allow for | 403 | * Note that phys_device is optional. It is here to allow for |
346 | * differentiation between which *physical* devices each | 404 | * differentiation between which *physical* devices each |
@@ -473,6 +531,9 @@ int __init memory_dev_init(void) | |||
473 | err = memory_probe_init(); | 531 | err = memory_probe_init(); |
474 | if (!ret) | 532 | if (!ret) |
475 | ret = err; | 533 | ret = err; |
534 | err = memory_fail_init(); | ||
535 | if (!ret) | ||
536 | ret = err; | ||
476 | err = block_size_init(); | 537 | err = block_size_init(); |
477 | if (!ret) | 538 | if (!ret) |
478 | ret = err; | 539 | ret = err; |