diff options
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 118 |
1 files changed, 86 insertions, 32 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 124324134ff..0207c2f6f8b 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/slab.h> | 51 | #include <linux/slab.h> |
52 | #include <linux/swapops.h> | 52 | #include <linux/swapops.h> |
53 | #include <linux/hugetlb.h> | 53 | #include <linux/hugetlb.h> |
54 | #include <linux/memory_hotplug.h> | ||
54 | #include "internal.h" | 55 | #include "internal.h" |
55 | 56 | ||
56 | int sysctl_memory_failure_early_kill __read_mostly = 0; | 57 | int sysctl_memory_failure_early_kill __read_mostly = 0; |
@@ -202,7 +203,7 @@ static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno, | |||
202 | #ifdef __ARCH_SI_TRAPNO | 203 | #ifdef __ARCH_SI_TRAPNO |
203 | si.si_trapno = trapno; | 204 | si.si_trapno = trapno; |
204 | #endif | 205 | #endif |
205 | si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT; | 206 | si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; |
206 | /* | 207 | /* |
207 | * Don't use force here, it's convenient if the signal | 208 | * Don't use force here, it's convenient if the signal |
208 | * can be temporarily blocked. | 209 | * can be temporarily blocked. |
@@ -232,8 +233,8 @@ void shake_page(struct page *p, int access) | |||
232 | } | 233 | } |
233 | 234 | ||
234 | /* | 235 | /* |
235 | * Only all shrink_slab here (which would also | 236 | * Only call shrink_slab here (which would also shrink other caches) if |
236 | * shrink other caches) if access is not potentially fatal. | 237 | * access is not potentially fatal. |
237 | */ | 238 | */ |
238 | if (access) { | 239 | if (access) { |
239 | int nr; | 240 | int nr; |
@@ -853,6 +854,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
853 | int ret; | 854 | int ret; |
854 | int kill = 1; | 855 | int kill = 1; |
855 | struct page *hpage = compound_head(p); | 856 | struct page *hpage = compound_head(p); |
857 | struct page *ppage; | ||
856 | 858 | ||
857 | if (PageReserved(p) || PageSlab(p)) | 859 | if (PageReserved(p) || PageSlab(p)) |
858 | return SWAP_SUCCESS; | 860 | return SWAP_SUCCESS; |
@@ -894,6 +896,44 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
894 | } | 896 | } |
895 | 897 | ||
896 | /* | 898 | /* |
899 | * ppage: poisoned page | ||
900 | * if p is regular page(4k page) | ||
901 | * ppage == real poisoned page; | ||
902 | * else p is hugetlb or THP, ppage == head page. | ||
903 | */ | ||
904 | ppage = hpage; | ||
905 | |||
906 | if (PageTransHuge(hpage)) { | ||
907 | /* | ||
908 | * Verify that this isn't a hugetlbfs head page, the check for | ||
909 | * PageAnon is just for avoid tripping a split_huge_page | ||
910 | * internal debug check, as split_huge_page refuses to deal with | ||
911 | * anything that isn't an anon page. PageAnon can't go away fro | ||
912 | * under us because we hold a refcount on the hpage, without a | ||
913 | * refcount on the hpage. split_huge_page can't be safely called | ||
914 | * in the first place, having a refcount on the tail isn't | ||
915 | * enough * to be safe. | ||
916 | */ | ||
917 | if (!PageHuge(hpage) && PageAnon(hpage)) { | ||
918 | if (unlikely(split_huge_page(hpage))) { | ||
919 | /* | ||
920 | * FIXME: if splitting THP is failed, it is | ||
921 | * better to stop the following operation rather | ||
922 | * than causing panic by unmapping. System might | ||
923 | * survive if the page is freed later. | ||
924 | */ | ||
925 | printk(KERN_INFO | ||
926 | "MCE %#lx: failed to split THP\n", pfn); | ||
927 | |||
928 | BUG_ON(!PageHWPoison(p)); | ||
929 | return SWAP_FAIL; | ||
930 | } | ||
931 | /* THP is split, so ppage should be the real poisoned page. */ | ||
932 | ppage = p; | ||
933 | } | ||
934 | } | ||
935 | |||
936 | /* | ||
897 | * First collect all the processes that have the page | 937 | * First collect all the processes that have the page |
898 | * mapped in dirty form. This has to be done before try_to_unmap, | 938 | * mapped in dirty form. This has to be done before try_to_unmap, |
899 | * because ttu takes the rmap data structures down. | 939 | * because ttu takes the rmap data structures down. |
@@ -902,12 +942,18 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
902 | * there's nothing that can be done. | 942 | * there's nothing that can be done. |
903 | */ | 943 | */ |
904 | if (kill) | 944 | if (kill) |
905 | collect_procs(hpage, &tokill); | 945 | collect_procs(ppage, &tokill); |
906 | 946 | ||
907 | ret = try_to_unmap(hpage, ttu); | 947 | if (hpage != ppage) |
948 | lock_page_nosync(ppage); | ||
949 | |||
950 | ret = try_to_unmap(ppage, ttu); | ||
908 | if (ret != SWAP_SUCCESS) | 951 | if (ret != SWAP_SUCCESS) |
909 | printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", | 952 | printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", |
910 | pfn, page_mapcount(hpage)); | 953 | pfn, page_mapcount(ppage)); |
954 | |||
955 | if (hpage != ppage) | ||
956 | unlock_page(ppage); | ||
911 | 957 | ||
912 | /* | 958 | /* |
913 | * Now that the dirty bit has been propagated to the | 959 | * Now that the dirty bit has been propagated to the |
@@ -918,7 +964,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
918 | * use a more force-full uncatchable kill to prevent | 964 | * use a more force-full uncatchable kill to prevent |
919 | * any accesses to the poisoned memory. | 965 | * any accesses to the poisoned memory. |
920 | */ | 966 | */ |
921 | kill_procs_ao(&tokill, !!PageDirty(hpage), trapno, | 967 | kill_procs_ao(&tokill, !!PageDirty(ppage), trapno, |
922 | ret != SWAP_SUCCESS, p, pfn); | 968 | ret != SWAP_SUCCESS, p, pfn); |
923 | 969 | ||
924 | return ret; | 970 | return ret; |
@@ -927,7 +973,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
927 | static void set_page_hwpoison_huge_page(struct page *hpage) | 973 | static void set_page_hwpoison_huge_page(struct page *hpage) |
928 | { | 974 | { |
929 | int i; | 975 | int i; |
930 | int nr_pages = 1 << compound_order(hpage); | 976 | int nr_pages = 1 << compound_trans_order(hpage); |
931 | for (i = 0; i < nr_pages; i++) | 977 | for (i = 0; i < nr_pages; i++) |
932 | SetPageHWPoison(hpage + i); | 978 | SetPageHWPoison(hpage + i); |
933 | } | 979 | } |
@@ -935,7 +981,7 @@ static void set_page_hwpoison_huge_page(struct page *hpage) | |||
935 | static void clear_page_hwpoison_huge_page(struct page *hpage) | 981 | static void clear_page_hwpoison_huge_page(struct page *hpage) |
936 | { | 982 | { |
937 | int i; | 983 | int i; |
938 | int nr_pages = 1 << compound_order(hpage); | 984 | int nr_pages = 1 << compound_trans_order(hpage); |
939 | for (i = 0; i < nr_pages; i++) | 985 | for (i = 0; i < nr_pages; i++) |
940 | ClearPageHWPoison(hpage + i); | 986 | ClearPageHWPoison(hpage + i); |
941 | } | 987 | } |
@@ -965,7 +1011,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
965 | return 0; | 1011 | return 0; |
966 | } | 1012 | } |
967 | 1013 | ||
968 | nr_pages = 1 << compound_order(hpage); | 1014 | nr_pages = 1 << compound_trans_order(hpage); |
969 | atomic_long_add(nr_pages, &mce_bad_pages); | 1015 | atomic_long_add(nr_pages, &mce_bad_pages); |
970 | 1016 | ||
971 | /* | 1017 | /* |
@@ -1019,19 +1065,22 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
1019 | * The check (unnecessarily) ignores LRU pages being isolated and | 1065 | * The check (unnecessarily) ignores LRU pages being isolated and |
1020 | * walked by the page reclaim code, however that's not a big loss. | 1066 | * walked by the page reclaim code, however that's not a big loss. |
1021 | */ | 1067 | */ |
1022 | if (!PageLRU(p) && !PageHuge(p)) | 1068 | if (!PageHuge(p) && !PageTransCompound(p)) { |
1023 | shake_page(p, 0); | 1069 | if (!PageLRU(p)) |
1024 | if (!PageLRU(p) && !PageHuge(p)) { | 1070 | shake_page(p, 0); |
1025 | /* | 1071 | if (!PageLRU(p)) { |
1026 | * shake_page could have turned it free. | 1072 | /* |
1027 | */ | 1073 | * shake_page could have turned it free. |
1028 | if (is_free_buddy_page(p)) { | 1074 | */ |
1029 | action_result(pfn, "free buddy, 2nd try", DELAYED); | 1075 | if (is_free_buddy_page(p)) { |
1030 | return 0; | 1076 | action_result(pfn, "free buddy, 2nd try", |
1077 | DELAYED); | ||
1078 | return 0; | ||
1079 | } | ||
1080 | action_result(pfn, "non LRU", IGNORED); | ||
1081 | put_page(p); | ||
1082 | return -EBUSY; | ||
1031 | } | 1083 | } |
1032 | action_result(pfn, "non LRU", IGNORED); | ||
1033 | put_page(p); | ||
1034 | return -EBUSY; | ||
1035 | } | 1084 | } |
1036 | 1085 | ||
1037 | /* | 1086 | /* |
@@ -1061,7 +1110,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
1061 | * For error on the tail page, we should set PG_hwpoison | 1110 | * For error on the tail page, we should set PG_hwpoison |
1062 | * on the head page to show that the hugepage is hwpoisoned | 1111 | * on the head page to show that the hugepage is hwpoisoned |
1063 | */ | 1112 | */ |
1064 | if (PageTail(p) && TestSetPageHWPoison(hpage)) { | 1113 | if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) { |
1065 | action_result(pfn, "hugepage already hardware poisoned", | 1114 | action_result(pfn, "hugepage already hardware poisoned", |
1066 | IGNORED); | 1115 | IGNORED); |
1067 | unlock_page(hpage); | 1116 | unlock_page(hpage); |
@@ -1163,7 +1212,7 @@ int unpoison_memory(unsigned long pfn) | |||
1163 | return 0; | 1212 | return 0; |
1164 | } | 1213 | } |
1165 | 1214 | ||
1166 | nr_pages = 1 << compound_order(page); | 1215 | nr_pages = 1 << compound_trans_order(page); |
1167 | 1216 | ||
1168 | if (!get_page_unless_zero(page)) { | 1217 | if (!get_page_unless_zero(page)) { |
1169 | /* | 1218 | /* |
@@ -1230,11 +1279,10 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1230 | return 1; | 1279 | return 1; |
1231 | 1280 | ||
1232 | /* | 1281 | /* |
1233 | * The lock_system_sleep prevents a race with memory hotplug, | 1282 | * The lock_memory_hotplug prevents a race with memory hotplug. |
1234 | * because the isolation assumes there's only a single user. | ||
1235 | * This is a big hammer, a better would be nicer. | 1283 | * This is a big hammer, a better would be nicer. |
1236 | */ | 1284 | */ |
1237 | lock_system_sleep(); | 1285 | lock_memory_hotplug(); |
1238 | 1286 | ||
1239 | /* | 1287 | /* |
1240 | * Isolate the page, so that it doesn't get reallocated if it | 1288 | * Isolate the page, so that it doesn't get reallocated if it |
@@ -1264,7 +1312,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1264 | ret = 1; | 1312 | ret = 1; |
1265 | } | 1313 | } |
1266 | unset_migratetype_isolate(p); | 1314 | unset_migratetype_isolate(p); |
1267 | unlock_system_sleep(); | 1315 | unlock_memory_hotplug(); |
1268 | return ret; | 1316 | return ret; |
1269 | } | 1317 | } |
1270 | 1318 | ||
@@ -1290,9 +1338,13 @@ static int soft_offline_huge_page(struct page *page, int flags) | |||
1290 | /* Keep page count to indicate a given hugepage is isolated. */ | 1338 | /* Keep page count to indicate a given hugepage is isolated. */ |
1291 | 1339 | ||
1292 | list_add(&hpage->lru, &pagelist); | 1340 | list_add(&hpage->lru, &pagelist); |
1293 | ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); | 1341 | ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0, |
1342 | true); | ||
1294 | if (ret) { | 1343 | if (ret) { |
1295 | putback_lru_pages(&pagelist); | 1344 | struct page *page1, *page2; |
1345 | list_for_each_entry_safe(page1, page2, &pagelist, lru) | ||
1346 | put_page(page1); | ||
1347 | |||
1296 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", | 1348 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", |
1297 | pfn, ret, page->flags); | 1349 | pfn, ret, page->flags); |
1298 | if (ret > 0) | 1350 | if (ret > 0) |
@@ -1301,7 +1353,7 @@ static int soft_offline_huge_page(struct page *page, int flags) | |||
1301 | } | 1353 | } |
1302 | done: | 1354 | done: |
1303 | if (!PageHWPoison(hpage)) | 1355 | if (!PageHWPoison(hpage)) |
1304 | atomic_long_add(1 << compound_order(hpage), &mce_bad_pages); | 1356 | atomic_long_add(1 << compound_trans_order(hpage), &mce_bad_pages); |
1305 | set_page_hwpoison_huge_page(hpage); | 1357 | set_page_hwpoison_huge_page(hpage); |
1306 | dequeue_hwpoisoned_huge_page(hpage); | 1358 | dequeue_hwpoisoned_huge_page(hpage); |
1307 | /* keep elevated page count for bad page */ | 1359 | /* keep elevated page count for bad page */ |
@@ -1413,8 +1465,10 @@ int soft_offline_page(struct page *page, int flags) | |||
1413 | LIST_HEAD(pagelist); | 1465 | LIST_HEAD(pagelist); |
1414 | 1466 | ||
1415 | list_add(&page->lru, &pagelist); | 1467 | list_add(&page->lru, &pagelist); |
1416 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); | 1468 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, |
1469 | 0, true); | ||
1417 | if (ret) { | 1470 | if (ret) { |
1471 | putback_lru_pages(&pagelist); | ||
1418 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", | 1472 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", |
1419 | pfn, ret, page->flags); | 1473 | pfn, ret, page->flags); |
1420 | if (ret > 0) | 1474 | if (ret > 0) |