diff options
-rw-r--r-- | mm/memory-failure.c | 73 |
1 files changed, 32 insertions, 41 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 757f6b0accfe..2044fe8920c2 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -7,21 +7,26 @@ | |||
7 | * Free Software Foundation. | 7 | * Free Software Foundation. |
8 | * | 8 | * |
9 | * High level machine check handler. Handles pages reported by the | 9 | * High level machine check handler. Handles pages reported by the |
10 | * hardware as being corrupted usually due to a 2bit ECC memory or cache | 10 | * hardware as being corrupted usually due to a multi-bit ECC memory or cache |
11 | * failure. | 11 | * failure. |
12 | * | ||
13 | * In addition there is a "soft offline" entry point that allows stop using | ||
14 | * not-yet-corrupted-by-suspicious pages without killing anything. | ||
12 | * | 15 | * |
13 | * Handles page cache pages in various states. The tricky part | 16 | * Handles page cache pages in various states. The tricky part |
14 | * here is that we can access any page asynchronous to other VM | 17 | * here is that we can access any page asynchronously in respect to |
15 | * users, because memory failures could happen anytime and anywhere, | 18 | * other VM users, because memory failures could happen anytime and |
16 | * possibly violating some of their assumptions. This is why this code | 19 | * anywhere. This could violate some of their assumptions. This is why |
17 | * has to be extremely careful. Generally it tries to use normal locking | 20 | * this code has to be extremely careful. Generally it tries to use |
18 | * rules, as in get the standard locks, even if that means the | 21 | * normal locking rules, as in get the standard locks, even if that means |
19 | * error handling takes potentially a long time. | 22 | * the error handling takes potentially a long time. |
20 | * | 23 | * |
21 | * The operation to map back from RMAP chains to processes has to walk | 24 | * There are several operations here with exponential complexity because |
22 | * the complete process list and has non linear complexity with the number | 25 | * of unsuitable VM data structures. For example the operation to map back |
23 | * mappings. In short it can be quite slow. But since memory corruptions | 26 | * from RMAP chains to processes has to walk the complete process list and |
24 | * are rare we hope to get away with this. | 27 | * has non linear complexity with the number. But since memory corruptions |
28 | * are rare we hope to get away with this. This avoids impacting the core | ||
29 | * VM. | ||
25 | */ | 30 | */ |
26 | 31 | ||
27 | /* | 32 | /* |
@@ -30,7 +35,6 @@ | |||
30 | * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages | 35 | * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages |
31 | * - pass bad pages to kdump next kernel | 36 | * - pass bad pages to kdump next kernel |
32 | */ | 37 | */ |
33 | #define DEBUG 1 /* remove me in 2.6.34 */ | ||
34 | #include <linux/kernel.h> | 38 | #include <linux/kernel.h> |
35 | #include <linux/mm.h> | 39 | #include <linux/mm.h> |
36 | #include <linux/page-flags.h> | 40 | #include <linux/page-flags.h> |
@@ -78,7 +82,7 @@ static int hwpoison_filter_dev(struct page *p) | |||
78 | return 0; | 82 | return 0; |
79 | 83 | ||
80 | /* | 84 | /* |
81 | * page_mapping() does not accept slab page | 85 | * page_mapping() does not accept slab pages. |
82 | */ | 86 | */ |
83 | if (PageSlab(p)) | 87 | if (PageSlab(p)) |
84 | return -EINVAL; | 88 | return -EINVAL; |
@@ -268,7 +272,7 @@ struct to_kill { | |||
268 | struct list_head nd; | 272 | struct list_head nd; |
269 | struct task_struct *tsk; | 273 | struct task_struct *tsk; |
270 | unsigned long addr; | 274 | unsigned long addr; |
271 | unsigned addr_valid:1; | 275 | char addr_valid; |
272 | }; | 276 | }; |
273 | 277 | ||
274 | /* | 278 | /* |
@@ -309,7 +313,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, | |||
309 | * a SIGKILL because the error is not contained anymore. | 313 | * a SIGKILL because the error is not contained anymore. |
310 | */ | 314 | */ |
311 | if (tk->addr == -EFAULT) { | 315 | if (tk->addr == -EFAULT) { |
312 | pr_debug("MCE: Unable to find user space address %lx in %s\n", | 316 | pr_info("MCE: Unable to find user space address %lx in %s\n", |
313 | page_to_pfn(p), tsk->comm); | 317 | page_to_pfn(p), tsk->comm); |
314 | tk->addr_valid = 0; | 318 | tk->addr_valid = 0; |
315 | } | 319 | } |
@@ -577,7 +581,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) | |||
577 | pfn, err); | 581 | pfn, err); |
578 | } else if (page_has_private(p) && | 582 | } else if (page_has_private(p) && |
579 | !try_to_release_page(p, GFP_NOIO)) { | 583 | !try_to_release_page(p, GFP_NOIO)) { |
580 | pr_debug("MCE %#lx: failed to release buffers\n", pfn); | 584 | pr_info("MCE %#lx: failed to release buffers\n", pfn); |
581 | } else { | 585 | } else { |
582 | ret = RECOVERED; | 586 | ret = RECOVERED; |
583 | } | 587 | } |
@@ -836,8 +840,6 @@ static int page_action(struct page_state *ps, struct page *p, | |||
836 | return (result == RECOVERED || result == DELAYED) ? 0 : -EBUSY; | 840 | return (result == RECOVERED || result == DELAYED) ? 0 : -EBUSY; |
837 | } | 841 | } |
838 | 842 | ||
839 | #define N_UNMAP_TRIES 5 | ||
840 | |||
841 | /* | 843 | /* |
842 | * Do all that is necessary to remove user space mappings. Unmap | 844 | * Do all that is necessary to remove user space mappings. Unmap |
843 | * the pages and send SIGBUS to the processes if the data was dirty. | 845 | * the pages and send SIGBUS to the processes if the data was dirty. |
@@ -849,7 +851,6 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
849 | struct address_space *mapping; | 851 | struct address_space *mapping; |
850 | LIST_HEAD(tokill); | 852 | LIST_HEAD(tokill); |
851 | int ret; | 853 | int ret; |
852 | int i; | ||
853 | int kill = 1; | 854 | int kill = 1; |
854 | struct page *hpage = compound_head(p); | 855 | struct page *hpage = compound_head(p); |
855 | 856 | ||
@@ -903,17 +904,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
903 | if (kill) | 904 | if (kill) |
904 | collect_procs(hpage, &tokill); | 905 | collect_procs(hpage, &tokill); |
905 | 906 | ||
906 | /* | 907 | ret = try_to_unmap(hpage, ttu); |
907 | * try_to_unmap can fail temporarily due to races. | ||
908 | * Try a few times (RED-PEN better strategy?) | ||
909 | */ | ||
910 | for (i = 0; i < N_UNMAP_TRIES; i++) { | ||
911 | ret = try_to_unmap(hpage, ttu); | ||
912 | if (ret == SWAP_SUCCESS) | ||
913 | break; | ||
914 | pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn, ret); | ||
915 | } | ||
916 | |||
917 | if (ret != SWAP_SUCCESS) | 908 | if (ret != SWAP_SUCCESS) |
918 | printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", | 909 | printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", |
919 | pfn, page_mapcount(hpage)); | 910 | pfn, page_mapcount(hpage)); |
@@ -1147,7 +1138,7 @@ int unpoison_memory(unsigned long pfn) | |||
1147 | page = compound_head(p); | 1138 | page = compound_head(p); |
1148 | 1139 | ||
1149 | if (!PageHWPoison(p)) { | 1140 | if (!PageHWPoison(p)) { |
1150 | pr_debug("MCE: Page was already unpoisoned %#lx\n", pfn); | 1141 | pr_info("MCE: Page was already unpoisoned %#lx\n", pfn); |
1151 | return 0; | 1142 | return 0; |
1152 | } | 1143 | } |
1153 | 1144 | ||
@@ -1156,7 +1147,7 @@ int unpoison_memory(unsigned long pfn) | |||
1156 | if (!get_page_unless_zero(page)) { | 1147 | if (!get_page_unless_zero(page)) { |
1157 | if (TestClearPageHWPoison(p)) | 1148 | if (TestClearPageHWPoison(p)) |
1158 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1149 | atomic_long_sub(nr_pages, &mce_bad_pages); |
1159 | pr_debug("MCE: Software-unpoisoned free page %#lx\n", pfn); | 1150 | pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); |
1160 | return 0; | 1151 | return 0; |
1161 | } | 1152 | } |
1162 | 1153 | ||
@@ -1168,7 +1159,7 @@ int unpoison_memory(unsigned long pfn) | |||
1168 | * the free buddy page pool. | 1159 | * the free buddy page pool. |
1169 | */ | 1160 | */ |
1170 | if (TestClearPageHWPoison(page)) { | 1161 | if (TestClearPageHWPoison(page)) { |
1171 | pr_debug("MCE: Software-unpoisoned page %#lx\n", pfn); | 1162 | pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); |
1172 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1163 | atomic_long_sub(nr_pages, &mce_bad_pages); |
1173 | freeit = 1; | 1164 | freeit = 1; |
1174 | } | 1165 | } |
@@ -1217,12 +1208,12 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1217 | set_migratetype_isolate(p); | 1208 | set_migratetype_isolate(p); |
1218 | if (!get_page_unless_zero(compound_head(p))) { | 1209 | if (!get_page_unless_zero(compound_head(p))) { |
1219 | if (is_free_buddy_page(p)) { | 1210 | if (is_free_buddy_page(p)) { |
1220 | pr_debug("get_any_page: %#lx free buddy page\n", pfn); | 1211 | pr_info("get_any_page: %#lx free buddy page\n", pfn); |
1221 | /* Set hwpoison bit while page is still isolated */ | 1212 | /* Set hwpoison bit while page is still isolated */ |
1222 | SetPageHWPoison(p); | 1213 | SetPageHWPoison(p); |
1223 | ret = 0; | 1214 | ret = 0; |
1224 | } else { | 1215 | } else { |
1225 | pr_debug("get_any_page: %#lx: unknown zero refcount page type %lx\n", | 1216 | pr_info("get_any_page: %#lx: unknown zero refcount page type %lx\n", |
1226 | pfn, p->flags); | 1217 | pfn, p->flags); |
1227 | ret = -EIO; | 1218 | ret = -EIO; |
1228 | } | 1219 | } |
@@ -1288,7 +1279,7 @@ int soft_offline_page(struct page *page, int flags) | |||
1288 | goto done; | 1279 | goto done; |
1289 | } | 1280 | } |
1290 | if (!PageLRU(page)) { | 1281 | if (!PageLRU(page)) { |
1291 | pr_debug("soft_offline: %#lx: unknown non LRU page type %lx\n", | 1282 | pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n", |
1292 | pfn, page->flags); | 1283 | pfn, page->flags); |
1293 | return -EIO; | 1284 | return -EIO; |
1294 | } | 1285 | } |
@@ -1302,7 +1293,7 @@ int soft_offline_page(struct page *page, int flags) | |||
1302 | if (PageHWPoison(page)) { | 1293 | if (PageHWPoison(page)) { |
1303 | unlock_page(page); | 1294 | unlock_page(page); |
1304 | put_page(page); | 1295 | put_page(page); |
1305 | pr_debug("soft offline: %#lx page already poisoned\n", pfn); | 1296 | pr_info("soft offline: %#lx page already poisoned\n", pfn); |
1306 | return -EBUSY; | 1297 | return -EBUSY; |
1307 | } | 1298 | } |
1308 | 1299 | ||
@@ -1323,7 +1314,7 @@ int soft_offline_page(struct page *page, int flags) | |||
1323 | put_page(page); | 1314 | put_page(page); |
1324 | if (ret == 1) { | 1315 | if (ret == 1) { |
1325 | ret = 0; | 1316 | ret = 0; |
1326 | pr_debug("soft_offline: %#lx: invalidated\n", pfn); | 1317 | pr_info("soft_offline: %#lx: invalidated\n", pfn); |
1327 | goto done; | 1318 | goto done; |
1328 | } | 1319 | } |
1329 | 1320 | ||
@@ -1339,13 +1330,13 @@ int soft_offline_page(struct page *page, int flags) | |||
1339 | list_add(&page->lru, &pagelist); | 1330 | list_add(&page->lru, &pagelist); |
1340 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); | 1331 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); |
1341 | if (ret) { | 1332 | if (ret) { |
1342 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", | 1333 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", |
1343 | pfn, ret, page->flags); | 1334 | pfn, ret, page->flags); |
1344 | if (ret > 0) | 1335 | if (ret > 0) |
1345 | ret = -EIO; | 1336 | ret = -EIO; |
1346 | } | 1337 | } |
1347 | } else { | 1338 | } else { |
1348 | pr_debug("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", | 1339 | pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", |
1349 | pfn, ret, page_count(page), page->flags); | 1340 | pfn, ret, page_count(page), page->flags); |
1350 | } | 1341 | } |
1351 | if (ret) | 1342 | if (ret) |