diff options
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 75 |
1 files changed, 33 insertions, 42 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 14589a228e97..44a8cefeae6e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -7,21 +7,26 @@ | |||
7 | * Free Software Foundation. | 7 | * Free Software Foundation. |
8 | * | 8 | * |
9 | * High level machine check handler. Handles pages reported by the | 9 | * High level machine check handler. Handles pages reported by the |
10 | * hardware as being corrupted usually due to a 2bit ECC memory or cache | 10 | * hardware as being corrupted usually due to a multi-bit ECC memory or cache |
11 | * failure. | 11 | * failure. |
12 | * | ||
13 | * In addition there is a "soft offline" entry point that allows stop using | ||
14 | * not-yet-corrupted-by-suspicious pages without killing anything. | ||
12 | * | 15 | * |
13 | * Handles page cache pages in various states. The tricky part | 16 | * Handles page cache pages in various states. The tricky part |
14 | * here is that we can access any page asynchronous to other VM | 17 | * here is that we can access any page asynchronously in respect to |
15 | * users, because memory failures could happen anytime and anywhere, | 18 | * other VM users, because memory failures could happen anytime and |
16 | * possibly violating some of their assumptions. This is why this code | 19 | * anywhere. This could violate some of their assumptions. This is why |
17 | * has to be extremely careful. Generally it tries to use normal locking | 20 | * this code has to be extremely careful. Generally it tries to use |
18 | * rules, as in get the standard locks, even if that means the | 21 | * normal locking rules, as in get the standard locks, even if that means |
19 | * error handling takes potentially a long time. | 22 | * the error handling takes potentially a long time. |
20 | * | 23 | * |
21 | * The operation to map back from RMAP chains to processes has to walk | 24 | * There are several operations here with exponential complexity because |
22 | * the complete process list and has non linear complexity with the number | 25 | * of unsuitable VM data structures. For example the operation to map back |
23 | * mappings. In short it can be quite slow. But since memory corruptions | 26 | * from RMAP chains to processes has to walk the complete process list and |
24 | * are rare we hope to get away with this. | 27 | * has non linear complexity with the number. But since memory corruptions |
28 | * are rare we hope to get away with this. This avoids impacting the core | ||
29 | * VM. | ||
25 | */ | 30 | */ |
26 | 31 | ||
27 | /* | 32 | /* |
@@ -30,7 +35,6 @@ | |||
30 | * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages | 35 | * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages |
31 | * - pass bad pages to kdump next kernel | 36 | * - pass bad pages to kdump next kernel |
32 | */ | 37 | */ |
33 | #define DEBUG 1 /* remove me in 2.6.34 */ | ||
34 | #include <linux/kernel.h> | 38 | #include <linux/kernel.h> |
35 | #include <linux/mm.h> | 39 | #include <linux/mm.h> |
36 | #include <linux/page-flags.h> | 40 | #include <linux/page-flags.h> |
@@ -78,7 +82,7 @@ static int hwpoison_filter_dev(struct page *p) | |||
78 | return 0; | 82 | return 0; |
79 | 83 | ||
80 | /* | 84 | /* |
81 | * page_mapping() does not accept slab page | 85 | * page_mapping() does not accept slab pages. |
82 | */ | 86 | */ |
83 | if (PageSlab(p)) | 87 | if (PageSlab(p)) |
84 | return -EINVAL; | 88 | return -EINVAL; |
@@ -268,7 +272,7 @@ struct to_kill { | |||
268 | struct list_head nd; | 272 | struct list_head nd; |
269 | struct task_struct *tsk; | 273 | struct task_struct *tsk; |
270 | unsigned long addr; | 274 | unsigned long addr; |
271 | unsigned addr_valid:1; | 275 | char addr_valid; |
272 | }; | 276 | }; |
273 | 277 | ||
274 | /* | 278 | /* |
@@ -309,7 +313,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, | |||
309 | * a SIGKILL because the error is not contained anymore. | 313 | * a SIGKILL because the error is not contained anymore. |
310 | */ | 314 | */ |
311 | if (tk->addr == -EFAULT) { | 315 | if (tk->addr == -EFAULT) { |
312 | pr_debug("MCE: Unable to find user space address %lx in %s\n", | 316 | pr_info("MCE: Unable to find user space address %lx in %s\n", |
313 | page_to_pfn(p), tsk->comm); | 317 | page_to_pfn(p), tsk->comm); |
314 | tk->addr_valid = 0; | 318 | tk->addr_valid = 0; |
315 | } | 319 | } |
@@ -577,7 +581,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) | |||
577 | pfn, err); | 581 | pfn, err); |
578 | } else if (page_has_private(p) && | 582 | } else if (page_has_private(p) && |
579 | !try_to_release_page(p, GFP_NOIO)) { | 583 | !try_to_release_page(p, GFP_NOIO)) { |
580 | pr_debug("MCE %#lx: failed to release buffers\n", pfn); | 584 | pr_info("MCE %#lx: failed to release buffers\n", pfn); |
581 | } else { | 585 | } else { |
582 | ret = RECOVERED; | 586 | ret = RECOVERED; |
583 | } | 587 | } |
@@ -836,8 +840,6 @@ static int page_action(struct page_state *ps, struct page *p, | |||
836 | return (result == RECOVERED || result == DELAYED) ? 0 : -EBUSY; | 840 | return (result == RECOVERED || result == DELAYED) ? 0 : -EBUSY; |
837 | } | 841 | } |
838 | 842 | ||
839 | #define N_UNMAP_TRIES 5 | ||
840 | |||
841 | /* | 843 | /* |
842 | * Do all that is necessary to remove user space mappings. Unmap | 844 | * Do all that is necessary to remove user space mappings. Unmap |
843 | * the pages and send SIGBUS to the processes if the data was dirty. | 845 | * the pages and send SIGBUS to the processes if the data was dirty. |
@@ -849,7 +851,6 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
849 | struct address_space *mapping; | 851 | struct address_space *mapping; |
850 | LIST_HEAD(tokill); | 852 | LIST_HEAD(tokill); |
851 | int ret; | 853 | int ret; |
852 | int i; | ||
853 | int kill = 1; | 854 | int kill = 1; |
854 | struct page *hpage = compound_head(p); | 855 | struct page *hpage = compound_head(p); |
855 | 856 | ||
@@ -903,17 +904,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
903 | if (kill) | 904 | if (kill) |
904 | collect_procs(hpage, &tokill); | 905 | collect_procs(hpage, &tokill); |
905 | 906 | ||
906 | /* | 907 | ret = try_to_unmap(hpage, ttu); |
907 | * try_to_unmap can fail temporarily due to races. | ||
908 | * Try a few times (RED-PEN better strategy?) | ||
909 | */ | ||
910 | for (i = 0; i < N_UNMAP_TRIES; i++) { | ||
911 | ret = try_to_unmap(hpage, ttu); | ||
912 | if (ret == SWAP_SUCCESS) | ||
913 | break; | ||
914 | pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn, ret); | ||
915 | } | ||
916 | |||
917 | if (ret != SWAP_SUCCESS) | 908 | if (ret != SWAP_SUCCESS) |
918 | printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", | 909 | printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", |
919 | pfn, page_mapcount(hpage)); | 910 | pfn, page_mapcount(hpage)); |
@@ -1168,7 +1159,7 @@ int unpoison_memory(unsigned long pfn) | |||
1168 | page = compound_head(p); | 1159 | page = compound_head(p); |
1169 | 1160 | ||
1170 | if (!PageHWPoison(p)) { | 1161 | if (!PageHWPoison(p)) { |
1171 | pr_debug("MCE: Page was already unpoisoned %#lx\n", pfn); | 1162 | pr_info("MCE: Page was already unpoisoned %#lx\n", pfn); |
1172 | return 0; | 1163 | return 0; |
1173 | } | 1164 | } |
1174 | 1165 | ||
@@ -1187,7 +1178,7 @@ int unpoison_memory(unsigned long pfn) | |||
1187 | } | 1178 | } |
1188 | if (TestClearPageHWPoison(p)) | 1179 | if (TestClearPageHWPoison(p)) |
1189 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1180 | atomic_long_sub(nr_pages, &mce_bad_pages); |
1190 | pr_debug("MCE: Software-unpoisoned free page %#lx\n", pfn); | 1181 | pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); |
1191 | return 0; | 1182 | return 0; |
1192 | } | 1183 | } |
1193 | 1184 | ||
@@ -1199,7 +1190,7 @@ int unpoison_memory(unsigned long pfn) | |||
1199 | * the free buddy page pool. | 1190 | * the free buddy page pool. |
1200 | */ | 1191 | */ |
1201 | if (TestClearPageHWPoison(page)) { | 1192 | if (TestClearPageHWPoison(page)) { |
1202 | pr_debug("MCE: Software-unpoisoned page %#lx\n", pfn); | 1193 | pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); |
1203 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1194 | atomic_long_sub(nr_pages, &mce_bad_pages); |
1204 | freeit = 1; | 1195 | freeit = 1; |
1205 | if (PageHuge(page)) | 1196 | if (PageHuge(page)) |
@@ -1256,15 +1247,15 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1256 | */ | 1247 | */ |
1257 | if (!get_page_unless_zero(compound_head(p))) { | 1248 | if (!get_page_unless_zero(compound_head(p))) { |
1258 | if (PageHuge(p)) { | 1249 | if (PageHuge(p)) { |
1259 | pr_debug("get_any_page: %#lx free huge page\n", pfn); | 1250 | pr_info("get_any_page: %#lx free huge page\n", pfn); |
1260 | ret = dequeue_hwpoisoned_huge_page(compound_head(p)); | 1251 | ret = dequeue_hwpoisoned_huge_page(compound_head(p)); |
1261 | } else if (is_free_buddy_page(p)) { | 1252 | } else if (is_free_buddy_page(p)) { |
1262 | pr_debug("get_any_page: %#lx free buddy page\n", pfn); | 1253 | pr_info("get_any_page: %#lx free buddy page\n", pfn); |
1263 | /* Set hwpoison bit while page is still isolated */ | 1254 | /* Set hwpoison bit while page is still isolated */ |
1264 | SetPageHWPoison(p); | 1255 | SetPageHWPoison(p); |
1265 | ret = 0; | 1256 | ret = 0; |
1266 | } else { | 1257 | } else { |
1267 | pr_debug("get_any_page: %#lx: unknown zero refcount page type %lx\n", | 1258 | pr_info("get_any_page: %#lx: unknown zero refcount page type %lx\n", |
1268 | pfn, p->flags); | 1259 | pfn, p->flags); |
1269 | ret = -EIO; | 1260 | ret = -EIO; |
1270 | } | 1261 | } |
@@ -1372,7 +1363,7 @@ int soft_offline_page(struct page *page, int flags) | |||
1372 | goto done; | 1363 | goto done; |
1373 | } | 1364 | } |
1374 | if (!PageLRU(page)) { | 1365 | if (!PageLRU(page)) { |
1375 | pr_debug("soft_offline: %#lx: unknown non LRU page type %lx\n", | 1366 | pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n", |
1376 | pfn, page->flags); | 1367 | pfn, page->flags); |
1377 | return -EIO; | 1368 | return -EIO; |
1378 | } | 1369 | } |
@@ -1386,7 +1377,7 @@ int soft_offline_page(struct page *page, int flags) | |||
1386 | if (PageHWPoison(page)) { | 1377 | if (PageHWPoison(page)) { |
1387 | unlock_page(page); | 1378 | unlock_page(page); |
1388 | put_page(page); | 1379 | put_page(page); |
1389 | pr_debug("soft offline: %#lx page already poisoned\n", pfn); | 1380 | pr_info("soft offline: %#lx page already poisoned\n", pfn); |
1390 | return -EBUSY; | 1381 | return -EBUSY; |
1391 | } | 1382 | } |
1392 | 1383 | ||
@@ -1407,7 +1398,7 @@ int soft_offline_page(struct page *page, int flags) | |||
1407 | put_page(page); | 1398 | put_page(page); |
1408 | if (ret == 1) { | 1399 | if (ret == 1) { |
1409 | ret = 0; | 1400 | ret = 0; |
1410 | pr_debug("soft_offline: %#lx: invalidated\n", pfn); | 1401 | pr_info("soft_offline: %#lx: invalidated\n", pfn); |
1411 | goto done; | 1402 | goto done; |
1412 | } | 1403 | } |
1413 | 1404 | ||
@@ -1423,13 +1414,13 @@ int soft_offline_page(struct page *page, int flags) | |||
1423 | list_add(&page->lru, &pagelist); | 1414 | list_add(&page->lru, &pagelist); |
1424 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); | 1415 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); |
1425 | if (ret) { | 1416 | if (ret) { |
1426 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", | 1417 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", |
1427 | pfn, ret, page->flags); | 1418 | pfn, ret, page->flags); |
1428 | if (ret > 0) | 1419 | if (ret > 0) |
1429 | ret = -EIO; | 1420 | ret = -EIO; |
1430 | } | 1421 | } |
1431 | } else { | 1422 | } else { |
1432 | pr_debug("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", | 1423 | pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", |
1433 | pfn, ret, page_count(page), page->flags); | 1424 | pfn, ret, page_count(page), page->flags); |
1434 | } | 1425 | } |
1435 | if (ret) | 1426 | if (ret) |