aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c118
1 files changed, 86 insertions, 32 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 124324134ff..0207c2f6f8b 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -51,6 +51,7 @@
51#include <linux/slab.h> 51#include <linux/slab.h>
52#include <linux/swapops.h> 52#include <linux/swapops.h>
53#include <linux/hugetlb.h> 53#include <linux/hugetlb.h>
54#include <linux/memory_hotplug.h>
54#include "internal.h" 55#include "internal.h"
55 56
56int sysctl_memory_failure_early_kill __read_mostly = 0; 57int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -202,7 +203,7 @@ static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
202#ifdef __ARCH_SI_TRAPNO 203#ifdef __ARCH_SI_TRAPNO
203 si.si_trapno = trapno; 204 si.si_trapno = trapno;
204#endif 205#endif
205 si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT; 206 si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT;
206 /* 207 /*
207 * Don't use force here, it's convenient if the signal 208 * Don't use force here, it's convenient if the signal
208 * can be temporarily blocked. 209 * can be temporarily blocked.
@@ -232,8 +233,8 @@ void shake_page(struct page *p, int access)
232 } 233 }
233 234
234 /* 235 /*
235 * Only all shrink_slab here (which would also 236 * Only call shrink_slab here (which would also shrink other caches) if
236 * shrink other caches) if access is not potentially fatal. 237 * access is not potentially fatal.
237 */ 238 */
238 if (access) { 239 if (access) {
239 int nr; 240 int nr;
@@ -853,6 +854,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
853 int ret; 854 int ret;
854 int kill = 1; 855 int kill = 1;
855 struct page *hpage = compound_head(p); 856 struct page *hpage = compound_head(p);
857 struct page *ppage;
856 858
857 if (PageReserved(p) || PageSlab(p)) 859 if (PageReserved(p) || PageSlab(p))
858 return SWAP_SUCCESS; 860 return SWAP_SUCCESS;
@@ -894,6 +896,44 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
894 } 896 }
895 897
896 /* 898 /*
899 * ppage: poisoned page
900 * if p is regular page(4k page)
901 * ppage == real poisoned page;
902 * else p is hugetlb or THP, ppage == head page.
903 */
904 ppage = hpage;
905
906 if (PageTransHuge(hpage)) {
907 /*
908 * Verify that this isn't a hugetlbfs head page, the check for
909 * PageAnon is just for avoid tripping a split_huge_page
910 * internal debug check, as split_huge_page refuses to deal with
911 * anything that isn't an anon page. PageAnon can't go away fro
912 * under us because we hold a refcount on the hpage, without a
913 * refcount on the hpage. split_huge_page can't be safely called
914 * in the first place, having a refcount on the tail isn't
915 * enough * to be safe.
916 */
917 if (!PageHuge(hpage) && PageAnon(hpage)) {
918 if (unlikely(split_huge_page(hpage))) {
919 /*
920 * FIXME: if splitting THP is failed, it is
921 * better to stop the following operation rather
922 * than causing panic by unmapping. System might
923 * survive if the page is freed later.
924 */
925 printk(KERN_INFO
926 "MCE %#lx: failed to split THP\n", pfn);
927
928 BUG_ON(!PageHWPoison(p));
929 return SWAP_FAIL;
930 }
931 /* THP is split, so ppage should be the real poisoned page. */
932 ppage = p;
933 }
934 }
935
936 /*
897 * First collect all the processes that have the page 937 * First collect all the processes that have the page
898 * mapped in dirty form. This has to be done before try_to_unmap, 938 * mapped in dirty form. This has to be done before try_to_unmap,
899 * because ttu takes the rmap data structures down. 939 * because ttu takes the rmap data structures down.
@@ -902,12 +942,18 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
902 * there's nothing that can be done. 942 * there's nothing that can be done.
903 */ 943 */
904 if (kill) 944 if (kill)
905 collect_procs(hpage, &tokill); 945 collect_procs(ppage, &tokill);
906 946
907 ret = try_to_unmap(hpage, ttu); 947 if (hpage != ppage)
948 lock_page_nosync(ppage);
949
950 ret = try_to_unmap(ppage, ttu);
908 if (ret != SWAP_SUCCESS) 951 if (ret != SWAP_SUCCESS)
909 printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", 952 printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
910 pfn, page_mapcount(hpage)); 953 pfn, page_mapcount(ppage));
954
955 if (hpage != ppage)
956 unlock_page(ppage);
911 957
912 /* 958 /*
913 * Now that the dirty bit has been propagated to the 959 * Now that the dirty bit has been propagated to the
@@ -918,7 +964,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
918 * use a more force-full uncatchable kill to prevent 964 * use a more force-full uncatchable kill to prevent
919 * any accesses to the poisoned memory. 965 * any accesses to the poisoned memory.
920 */ 966 */
921 kill_procs_ao(&tokill, !!PageDirty(hpage), trapno, 967 kill_procs_ao(&tokill, !!PageDirty(ppage), trapno,
922 ret != SWAP_SUCCESS, p, pfn); 968 ret != SWAP_SUCCESS, p, pfn);
923 969
924 return ret; 970 return ret;
@@ -927,7 +973,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
927static void set_page_hwpoison_huge_page(struct page *hpage) 973static void set_page_hwpoison_huge_page(struct page *hpage)
928{ 974{
929 int i; 975 int i;
930 int nr_pages = 1 << compound_order(hpage); 976 int nr_pages = 1 << compound_trans_order(hpage);
931 for (i = 0; i < nr_pages; i++) 977 for (i = 0; i < nr_pages; i++)
932 SetPageHWPoison(hpage + i); 978 SetPageHWPoison(hpage + i);
933} 979}
@@ -935,7 +981,7 @@ static void set_page_hwpoison_huge_page(struct page *hpage)
935static void clear_page_hwpoison_huge_page(struct page *hpage) 981static void clear_page_hwpoison_huge_page(struct page *hpage)
936{ 982{
937 int i; 983 int i;
938 int nr_pages = 1 << compound_order(hpage); 984 int nr_pages = 1 << compound_trans_order(hpage);
939 for (i = 0; i < nr_pages; i++) 985 for (i = 0; i < nr_pages; i++)
940 ClearPageHWPoison(hpage + i); 986 ClearPageHWPoison(hpage + i);
941} 987}
@@ -965,7 +1011,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
965 return 0; 1011 return 0;
966 } 1012 }
967 1013
968 nr_pages = 1 << compound_order(hpage); 1014 nr_pages = 1 << compound_trans_order(hpage);
969 atomic_long_add(nr_pages, &mce_bad_pages); 1015 atomic_long_add(nr_pages, &mce_bad_pages);
970 1016
971 /* 1017 /*
@@ -1019,19 +1065,22 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
1019 * The check (unnecessarily) ignores LRU pages being isolated and 1065 * The check (unnecessarily) ignores LRU pages being isolated and
1020 * walked by the page reclaim code, however that's not a big loss. 1066 * walked by the page reclaim code, however that's not a big loss.
1021 */ 1067 */
1022 if (!PageLRU(p) && !PageHuge(p)) 1068 if (!PageHuge(p) && !PageTransCompound(p)) {
1023 shake_page(p, 0); 1069 if (!PageLRU(p))
1024 if (!PageLRU(p) && !PageHuge(p)) { 1070 shake_page(p, 0);
1025 /* 1071 if (!PageLRU(p)) {
1026 * shake_page could have turned it free. 1072 /*
1027 */ 1073 * shake_page could have turned it free.
1028 if (is_free_buddy_page(p)) { 1074 */
1029 action_result(pfn, "free buddy, 2nd try", DELAYED); 1075 if (is_free_buddy_page(p)) {
1030 return 0; 1076 action_result(pfn, "free buddy, 2nd try",
1077 DELAYED);
1078 return 0;
1079 }
1080 action_result(pfn, "non LRU", IGNORED);
1081 put_page(p);
1082 return -EBUSY;
1031 } 1083 }
1032 action_result(pfn, "non LRU", IGNORED);
1033 put_page(p);
1034 return -EBUSY;
1035 } 1084 }
1036 1085
1037 /* 1086 /*
@@ -1061,7 +1110,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
1061 * For error on the tail page, we should set PG_hwpoison 1110 * For error on the tail page, we should set PG_hwpoison
1062 * on the head page to show that the hugepage is hwpoisoned 1111 * on the head page to show that the hugepage is hwpoisoned
1063 */ 1112 */
1064 if (PageTail(p) && TestSetPageHWPoison(hpage)) { 1113 if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) {
1065 action_result(pfn, "hugepage already hardware poisoned", 1114 action_result(pfn, "hugepage already hardware poisoned",
1066 IGNORED); 1115 IGNORED);
1067 unlock_page(hpage); 1116 unlock_page(hpage);
@@ -1163,7 +1212,7 @@ int unpoison_memory(unsigned long pfn)
1163 return 0; 1212 return 0;
1164 } 1213 }
1165 1214
1166 nr_pages = 1 << compound_order(page); 1215 nr_pages = 1 << compound_trans_order(page);
1167 1216
1168 if (!get_page_unless_zero(page)) { 1217 if (!get_page_unless_zero(page)) {
1169 /* 1218 /*
@@ -1230,11 +1279,10 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1230 return 1; 1279 return 1;
1231 1280
1232 /* 1281 /*
1233 * The lock_system_sleep prevents a race with memory hotplug, 1282 * The lock_memory_hotplug prevents a race with memory hotplug.
1234 * because the isolation assumes there's only a single user.
1235 * This is a big hammer, a better would be nicer. 1283 * This is a big hammer, a better would be nicer.
1236 */ 1284 */
1237 lock_system_sleep(); 1285 lock_memory_hotplug();
1238 1286
1239 /* 1287 /*
1240 * Isolate the page, so that it doesn't get reallocated if it 1288 * Isolate the page, so that it doesn't get reallocated if it
@@ -1264,7 +1312,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1264 ret = 1; 1312 ret = 1;
1265 } 1313 }
1266 unset_migratetype_isolate(p); 1314 unset_migratetype_isolate(p);
1267 unlock_system_sleep(); 1315 unlock_memory_hotplug();
1268 return ret; 1316 return ret;
1269} 1317}
1270 1318
@@ -1290,9 +1338,13 @@ static int soft_offline_huge_page(struct page *page, int flags)
1290 /* Keep page count to indicate a given hugepage is isolated. */ 1338 /* Keep page count to indicate a given hugepage is isolated. */
1291 1339
1292 list_add(&hpage->lru, &pagelist); 1340 list_add(&hpage->lru, &pagelist);
1293 ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); 1341 ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0,
1342 true);
1294 if (ret) { 1343 if (ret) {
1295 putback_lru_pages(&pagelist); 1344 struct page *page1, *page2;
1345 list_for_each_entry_safe(page1, page2, &pagelist, lru)
1346 put_page(page1);
1347
1296 pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", 1348 pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
1297 pfn, ret, page->flags); 1349 pfn, ret, page->flags);
1298 if (ret > 0) 1350 if (ret > 0)
@@ -1301,7 +1353,7 @@ static int soft_offline_huge_page(struct page *page, int flags)
1301 } 1353 }
1302done: 1354done:
1303 if (!PageHWPoison(hpage)) 1355 if (!PageHWPoison(hpage))
1304 atomic_long_add(1 << compound_order(hpage), &mce_bad_pages); 1356 atomic_long_add(1 << compound_trans_order(hpage), &mce_bad_pages);
1305 set_page_hwpoison_huge_page(hpage); 1357 set_page_hwpoison_huge_page(hpage);
1306 dequeue_hwpoisoned_huge_page(hpage); 1358 dequeue_hwpoisoned_huge_page(hpage);
1307 /* keep elevated page count for bad page */ 1359 /* keep elevated page count for bad page */
@@ -1413,8 +1465,10 @@ int soft_offline_page(struct page *page, int flags)
1413 LIST_HEAD(pagelist); 1465 LIST_HEAD(pagelist);
1414 1466
1415 list_add(&page->lru, &pagelist); 1467 list_add(&page->lru, &pagelist);
1416 ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); 1468 ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
1469 0, true);
1417 if (ret) { 1470 if (ret) {
1471 putback_lru_pages(&pagelist);
1418 pr_info("soft offline: %#lx: migration failed %d, type %lx\n", 1472 pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
1419 pfn, ret, page->flags); 1473 pfn, ret, page->flags);
1420 if (ret > 0) 1474 if (ret > 0)