diff options
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 102 |
1 files changed, 93 insertions, 9 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2044fe8920c2..44a8cefeae6e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -697,11 +697,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) | |||
697 | * Issues: | 697 | * Issues: |
698 | * - Error on hugepage is contained in hugepage unit (not in raw page unit.) | 698 | * - Error on hugepage is contained in hugepage unit (not in raw page unit.) |
699 | * To narrow down kill region to one page, we need to break up pmd. | 699 | * To narrow down kill region to one page, we need to break up pmd. |
700 | * - To support soft-offlining for hugepage, we need to support hugepage | ||
701 | * migration. | ||
702 | */ | 700 | */ |
703 | static int me_huge_page(struct page *p, unsigned long pfn) | 701 | static int me_huge_page(struct page *p, unsigned long pfn) |
704 | { | 702 | { |
703 | int res = 0; | ||
705 | struct page *hpage = compound_head(p); | 704 | struct page *hpage = compound_head(p); |
706 | /* | 705 | /* |
707 | * We can safely recover from error on free or reserved (i.e. | 706 | * We can safely recover from error on free or reserved (i.e. |
@@ -714,8 +713,9 @@ static int me_huge_page(struct page *p, unsigned long pfn) | |||
714 | * so there is no race between isolation and mapping/unmapping. | 713 | * so there is no race between isolation and mapping/unmapping. |
715 | */ | 714 | */ |
716 | if (!(page_mapping(hpage) || PageAnon(hpage))) { | 715 | if (!(page_mapping(hpage) || PageAnon(hpage))) { |
717 | __isolate_hwpoisoned_huge_page(hpage); | 716 | res = dequeue_hwpoisoned_huge_page(hpage); |
718 | return RECOVERED; | 717 | if (!res) |
718 | return RECOVERED; | ||
719 | } | 719 | } |
720 | return DELAYED; | 720 | return DELAYED; |
721 | } | 721 | } |
@@ -972,7 +972,10 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
972 | * We need/can do nothing about count=0 pages. | 972 | * We need/can do nothing about count=0 pages. |
973 | * 1) it's a free page, and therefore in safe hand: | 973 | * 1) it's a free page, and therefore in safe hand: |
974 | * prep_new_page() will be the gate keeper. | 974 | * prep_new_page() will be the gate keeper. |
975 | * 2) it's part of a non-compound high order page. | 975 | * 2) it's a free hugepage, which is also safe: |
976 | * an affected hugepage will be dequeued from hugepage freelist, | ||
977 | * so there's no concern about reusing it ever after. | ||
978 | * 3) it's part of a non-compound high order page. | ||
976 | * Implies some kernel user: cannot stop them from | 979 | * Implies some kernel user: cannot stop them from |
977 | * R/W the page; let's pray that the page has been | 980 | * R/W the page; let's pray that the page has been |
978 | * used and will be freed some time later. | 981 | * used and will be freed some time later. |
@@ -984,6 +987,24 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
984 | if (is_free_buddy_page(p)) { | 987 | if (is_free_buddy_page(p)) { |
985 | action_result(pfn, "free buddy", DELAYED); | 988 | action_result(pfn, "free buddy", DELAYED); |
986 | return 0; | 989 | return 0; |
990 | } else if (PageHuge(hpage)) { | ||
991 | /* | ||
992 | * Check "just unpoisoned", "filter hit", and | ||
993 | * "race with other subpage." | ||
994 | */ | ||
995 | lock_page_nosync(hpage); | ||
996 | if (!PageHWPoison(hpage) | ||
997 | || (hwpoison_filter(p) && TestClearPageHWPoison(p)) | ||
998 | || (p != hpage && TestSetPageHWPoison(hpage))) { | ||
999 | atomic_long_sub(nr_pages, &mce_bad_pages); | ||
1000 | return 0; | ||
1001 | } | ||
1002 | set_page_hwpoison_huge_page(hpage); | ||
1003 | res = dequeue_hwpoisoned_huge_page(hpage); | ||
1004 | action_result(pfn, "free huge", | ||
1005 | res ? IGNORED : DELAYED); | ||
1006 | unlock_page(hpage); | ||
1007 | return res; | ||
987 | } else { | 1008 | } else { |
988 | action_result(pfn, "high order kernel", IGNORED); | 1009 | action_result(pfn, "high order kernel", IGNORED); |
989 | return -EBUSY; | 1010 | return -EBUSY; |
@@ -1145,6 +1166,16 @@ int unpoison_memory(unsigned long pfn) | |||
1145 | nr_pages = 1 << compound_order(page); | 1166 | nr_pages = 1 << compound_order(page); |
1146 | 1167 | ||
1147 | if (!get_page_unless_zero(page)) { | 1168 | if (!get_page_unless_zero(page)) { |
1169 | /* | ||
1170 | * Since HWPoisoned hugepage should have non-zero refcount, | ||
1171 | * race between memory failure and unpoison seems to happen. | ||
1172 | * In such case unpoison fails and memory failure runs | ||
1173 | * to the end. | ||
1174 | */ | ||
1175 | if (PageHuge(page)) { | ||
1176 | pr_debug("MCE: Memory failure is now running on free hugepage %#lx\n", pfn); | ||
1177 | return 0; | ||
1178 | } | ||
1148 | if (TestClearPageHWPoison(p)) | 1179 | if (TestClearPageHWPoison(p)) |
1149 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1180 | atomic_long_sub(nr_pages, &mce_bad_pages); |
1150 | pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); | 1181 | pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); |
@@ -1162,9 +1193,9 @@ int unpoison_memory(unsigned long pfn) | |||
1162 | pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); | 1193 | pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); |
1163 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1194 | atomic_long_sub(nr_pages, &mce_bad_pages); |
1164 | freeit = 1; | 1195 | freeit = 1; |
1196 | if (PageHuge(page)) | ||
1197 | clear_page_hwpoison_huge_page(page); | ||
1165 | } | 1198 | } |
1166 | if (PageHuge(p)) | ||
1167 | clear_page_hwpoison_huge_page(page); | ||
1168 | unlock_page(page); | 1199 | unlock_page(page); |
1169 | 1200 | ||
1170 | put_page(page); | 1201 | put_page(page); |
@@ -1178,7 +1209,11 @@ EXPORT_SYMBOL(unpoison_memory); | |||
1178 | static struct page *new_page(struct page *p, unsigned long private, int **x) | 1209 | static struct page *new_page(struct page *p, unsigned long private, int **x) |
1179 | { | 1210 | { |
1180 | int nid = page_to_nid(p); | 1211 | int nid = page_to_nid(p); |
1181 | return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0); | 1212 | if (PageHuge(p)) |
1213 | return alloc_huge_page_node(page_hstate(compound_head(p)), | ||
1214 | nid); | ||
1215 | else | ||
1216 | return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0); | ||
1182 | } | 1217 | } |
1183 | 1218 | ||
1184 | /* | 1219 | /* |
@@ -1206,8 +1241,15 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1206 | * was free. | 1241 | * was free. |
1207 | */ | 1242 | */ |
1208 | set_migratetype_isolate(p); | 1243 | set_migratetype_isolate(p); |
1244 | /* | ||
1245 | * When the target page is a free hugepage, just remove it | ||
1246 | * from free hugepage list. | ||
1247 | */ | ||
1209 | if (!get_page_unless_zero(compound_head(p))) { | 1248 | if (!get_page_unless_zero(compound_head(p))) { |
1210 | if (is_free_buddy_page(p)) { | 1249 | if (PageHuge(p)) { |
1250 | pr_info("get_any_page: %#lx free huge page\n", pfn); | ||
1251 | ret = dequeue_hwpoisoned_huge_page(compound_head(p)); | ||
1252 | } else if (is_free_buddy_page(p)) { | ||
1211 | pr_info("get_any_page: %#lx free buddy page\n", pfn); | 1253 | pr_info("get_any_page: %#lx free buddy page\n", pfn); |
1212 | /* Set hwpoison bit while page is still isolated */ | 1254 | /* Set hwpoison bit while page is still isolated */ |
1213 | SetPageHWPoison(p); | 1255 | SetPageHWPoison(p); |
@@ -1226,6 +1268,45 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1226 | return ret; | 1268 | return ret; |
1227 | } | 1269 | } |
1228 | 1270 | ||
1271 | static int soft_offline_huge_page(struct page *page, int flags) | ||
1272 | { | ||
1273 | int ret; | ||
1274 | unsigned long pfn = page_to_pfn(page); | ||
1275 | struct page *hpage = compound_head(page); | ||
1276 | LIST_HEAD(pagelist); | ||
1277 | |||
1278 | ret = get_any_page(page, pfn, flags); | ||
1279 | if (ret < 0) | ||
1280 | return ret; | ||
1281 | if (ret == 0) | ||
1282 | goto done; | ||
1283 | |||
1284 | if (PageHWPoison(hpage)) { | ||
1285 | put_page(hpage); | ||
1286 | pr_debug("soft offline: %#lx hugepage already poisoned\n", pfn); | ||
1287 | return -EBUSY; | ||
1288 | } | ||
1289 | |||
1290 | /* Keep page count to indicate a given hugepage is isolated. */ | ||
1291 | |||
1292 | list_add(&hpage->lru, &pagelist); | ||
1293 | ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); | ||
1294 | if (ret) { | ||
1295 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", | ||
1296 | pfn, ret, page->flags); | ||
1297 | if (ret > 0) | ||
1298 | ret = -EIO; | ||
1299 | return ret; | ||
1300 | } | ||
1301 | done: | ||
1302 | if (!PageHWPoison(hpage)) | ||
1303 | atomic_long_add(1 << compound_order(hpage), &mce_bad_pages); | ||
1304 | set_page_hwpoison_huge_page(hpage); | ||
1305 | dequeue_hwpoisoned_huge_page(hpage); | ||
1306 | /* keep elevated page count for bad page */ | ||
1307 | return ret; | ||
1308 | } | ||
1309 | |||
1229 | /** | 1310 | /** |
1230 | * soft_offline_page - Soft offline a page. | 1311 | * soft_offline_page - Soft offline a page. |
1231 | * @page: page to offline | 1312 | * @page: page to offline |
@@ -1253,6 +1334,9 @@ int soft_offline_page(struct page *page, int flags) | |||
1253 | int ret; | 1334 | int ret; |
1254 | unsigned long pfn = page_to_pfn(page); | 1335 | unsigned long pfn = page_to_pfn(page); |
1255 | 1336 | ||
1337 | if (PageHuge(page)) | ||
1338 | return soft_offline_huge_page(page, flags); | ||
1339 | |||
1256 | ret = get_any_page(page, pfn, flags); | 1340 | ret = get_any_page(page, pfn, flags); |
1257 | if (ret < 0) | 1341 | if (ret < 0) |
1258 | return ret; | 1342 | return ret; |