diff options
Diffstat (limited to 'mm/memory-failure.c')
| -rw-r--r-- | mm/memory-failure.c | 102 | 
1 files changed, 93 insertions, 9 deletions
| diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2044fe8920c2..44a8cefeae6e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
| @@ -697,11 +697,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) | |||
| 697 | * Issues: | 697 | * Issues: | 
| 698 | * - Error on hugepage is contained in hugepage unit (not in raw page unit.) | 698 | * - Error on hugepage is contained in hugepage unit (not in raw page unit.) | 
| 699 | * To narrow down kill region to one page, we need to break up pmd. | 699 | * To narrow down kill region to one page, we need to break up pmd. | 
| 700 | * - To support soft-offlining for hugepage, we need to support hugepage | ||
| 701 | * migration. | ||
| 702 | */ | 700 | */ | 
| 703 | static int me_huge_page(struct page *p, unsigned long pfn) | 701 | static int me_huge_page(struct page *p, unsigned long pfn) | 
| 704 | { | 702 | { | 
| 703 | int res = 0; | ||
| 705 | struct page *hpage = compound_head(p); | 704 | struct page *hpage = compound_head(p); | 
| 706 | /* | 705 | /* | 
| 707 | * We can safely recover from error on free or reserved (i.e. | 706 | * We can safely recover from error on free or reserved (i.e. | 
| @@ -714,8 +713,9 @@ static int me_huge_page(struct page *p, unsigned long pfn) | |||
| 714 | * so there is no race between isolation and mapping/unmapping. | 713 | * so there is no race between isolation and mapping/unmapping. | 
| 715 | */ | 714 | */ | 
| 716 | if (!(page_mapping(hpage) || PageAnon(hpage))) { | 715 | if (!(page_mapping(hpage) || PageAnon(hpage))) { | 
| 717 | __isolate_hwpoisoned_huge_page(hpage); | 716 | res = dequeue_hwpoisoned_huge_page(hpage); | 
| 718 | return RECOVERED; | 717 | if (!res) | 
| 718 | return RECOVERED; | ||
| 719 | } | 719 | } | 
| 720 | return DELAYED; | 720 | return DELAYED; | 
| 721 | } | 721 | } | 
| @@ -972,7 +972,10 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
| 972 | * We need/can do nothing about count=0 pages. | 972 | * We need/can do nothing about count=0 pages. | 
| 973 | * 1) it's a free page, and therefore in safe hand: | 973 | * 1) it's a free page, and therefore in safe hand: | 
| 974 | * prep_new_page() will be the gate keeper. | 974 | * prep_new_page() will be the gate keeper. | 
| 975 | * 2) it's part of a non-compound high order page. | 975 | * 2) it's a free hugepage, which is also safe: | 
| 976 | * an affected hugepage will be dequeued from hugepage freelist, | ||
| 977 | * so there's no concern about reusing it ever after. | ||
| 978 | * 3) it's part of a non-compound high order page. | ||
| 976 | * Implies some kernel user: cannot stop them from | 979 | * Implies some kernel user: cannot stop them from | 
| 977 | * R/W the page; let's pray that the page has been | 980 | * R/W the page; let's pray that the page has been | 
| 978 | * used and will be freed some time later. | 981 | * used and will be freed some time later. | 
| @@ -984,6 +987,24 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
| 984 | if (is_free_buddy_page(p)) { | 987 | if (is_free_buddy_page(p)) { | 
| 985 | action_result(pfn, "free buddy", DELAYED); | 988 | action_result(pfn, "free buddy", DELAYED); | 
| 986 | return 0; | 989 | return 0; | 
| 990 | } else if (PageHuge(hpage)) { | ||
| 991 | /* | ||
| 992 | * Check "just unpoisoned", "filter hit", and | ||
| 993 | * "race with other subpage." | ||
| 994 | */ | ||
| 995 | lock_page_nosync(hpage); | ||
| 996 | if (!PageHWPoison(hpage) | ||
| 997 | || (hwpoison_filter(p) && TestClearPageHWPoison(p)) | ||
| 998 | || (p != hpage && TestSetPageHWPoison(hpage))) { | ||
| 999 | atomic_long_sub(nr_pages, &mce_bad_pages); | ||
| 1000 | return 0; | ||
| 1001 | } | ||
| 1002 | set_page_hwpoison_huge_page(hpage); | ||
| 1003 | res = dequeue_hwpoisoned_huge_page(hpage); | ||
| 1004 | action_result(pfn, "free huge", | ||
| 1005 | res ? IGNORED : DELAYED); | ||
| 1006 | unlock_page(hpage); | ||
| 1007 | return res; | ||
| 987 | } else { | 1008 | } else { | 
| 988 | action_result(pfn, "high order kernel", IGNORED); | 1009 | action_result(pfn, "high order kernel", IGNORED); | 
| 989 | return -EBUSY; | 1010 | return -EBUSY; | 
| @@ -1145,6 +1166,16 @@ int unpoison_memory(unsigned long pfn) | |||
| 1145 | nr_pages = 1 << compound_order(page); | 1166 | nr_pages = 1 << compound_order(page); | 
| 1146 | 1167 | ||
| 1147 | if (!get_page_unless_zero(page)) { | 1168 | if (!get_page_unless_zero(page)) { | 
| 1169 | /* | ||
| 1170 | * Since HWPoisoned hugepage should have non-zero refcount, | ||
| 1171 | * race between memory failure and unpoison seems to happen. | ||
| 1172 | * In such case unpoison fails and memory failure runs | ||
| 1173 | * to the end. | ||
| 1174 | */ | ||
| 1175 | if (PageHuge(page)) { | ||
| 1176 | pr_debug("MCE: Memory failure is now running on free hugepage %#lx\n", pfn); | ||
| 1177 | return 0; | ||
| 1178 | } | ||
| 1148 | if (TestClearPageHWPoison(p)) | 1179 | if (TestClearPageHWPoison(p)) | 
| 1149 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1180 | atomic_long_sub(nr_pages, &mce_bad_pages); | 
| 1150 | pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); | 1181 | pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); | 
| @@ -1162,9 +1193,9 @@ int unpoison_memory(unsigned long pfn) | |||
| 1162 | pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); | 1193 | pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); | 
| 1163 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1194 | atomic_long_sub(nr_pages, &mce_bad_pages); | 
| 1164 | freeit = 1; | 1195 | freeit = 1; | 
| 1196 | if (PageHuge(page)) | ||
| 1197 | clear_page_hwpoison_huge_page(page); | ||
| 1165 | } | 1198 | } | 
| 1166 | if (PageHuge(p)) | ||
| 1167 | clear_page_hwpoison_huge_page(page); | ||
| 1168 | unlock_page(page); | 1199 | unlock_page(page); | 
| 1169 | 1200 | ||
| 1170 | put_page(page); | 1201 | put_page(page); | 
| @@ -1178,7 +1209,11 @@ EXPORT_SYMBOL(unpoison_memory); | |||
| 1178 | static struct page *new_page(struct page *p, unsigned long private, int **x) | 1209 | static struct page *new_page(struct page *p, unsigned long private, int **x) | 
| 1179 | { | 1210 | { | 
| 1180 | int nid = page_to_nid(p); | 1211 | int nid = page_to_nid(p); | 
| 1181 | return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0); | 1212 | if (PageHuge(p)) | 
| 1213 | return alloc_huge_page_node(page_hstate(compound_head(p)), | ||
| 1214 | nid); | ||
| 1215 | else | ||
| 1216 | return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0); | ||
| 1182 | } | 1217 | } | 
| 1183 | 1218 | ||
| 1184 | /* | 1219 | /* | 
| @@ -1206,8 +1241,15 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
| 1206 | * was free. | 1241 | * was free. | 
| 1207 | */ | 1242 | */ | 
| 1208 | set_migratetype_isolate(p); | 1243 | set_migratetype_isolate(p); | 
| 1244 | /* | ||
| 1245 | * When the target page is a free hugepage, just remove it | ||
| 1246 | * from free hugepage list. | ||
| 1247 | */ | ||
| 1209 | if (!get_page_unless_zero(compound_head(p))) { | 1248 | if (!get_page_unless_zero(compound_head(p))) { | 
| 1210 | if (is_free_buddy_page(p)) { | 1249 | if (PageHuge(p)) { | 
| 1250 | pr_info("get_any_page: %#lx free huge page\n", pfn); | ||
| 1251 | ret = dequeue_hwpoisoned_huge_page(compound_head(p)); | ||
| 1252 | } else if (is_free_buddy_page(p)) { | ||
| 1211 | pr_info("get_any_page: %#lx free buddy page\n", pfn); | 1253 | pr_info("get_any_page: %#lx free buddy page\n", pfn); | 
| 1212 | /* Set hwpoison bit while page is still isolated */ | 1254 | /* Set hwpoison bit while page is still isolated */ | 
| 1213 | SetPageHWPoison(p); | 1255 | SetPageHWPoison(p); | 
| @@ -1226,6 +1268,45 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
| 1226 | return ret; | 1268 | return ret; | 
| 1227 | } | 1269 | } | 
| 1228 | 1270 | ||
| 1271 | static int soft_offline_huge_page(struct page *page, int flags) | ||
| 1272 | { | ||
| 1273 | int ret; | ||
| 1274 | unsigned long pfn = page_to_pfn(page); | ||
| 1275 | struct page *hpage = compound_head(page); | ||
| 1276 | LIST_HEAD(pagelist); | ||
| 1277 | |||
| 1278 | ret = get_any_page(page, pfn, flags); | ||
| 1279 | if (ret < 0) | ||
| 1280 | return ret; | ||
| 1281 | if (ret == 0) | ||
| 1282 | goto done; | ||
| 1283 | |||
| 1284 | if (PageHWPoison(hpage)) { | ||
| 1285 | put_page(hpage); | ||
| 1286 | pr_debug("soft offline: %#lx hugepage already poisoned\n", pfn); | ||
| 1287 | return -EBUSY; | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | /* Keep page count to indicate a given hugepage is isolated. */ | ||
| 1291 | |||
| 1292 | list_add(&hpage->lru, &pagelist); | ||
| 1293 | ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); | ||
| 1294 | if (ret) { | ||
| 1295 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", | ||
| 1296 | pfn, ret, page->flags); | ||
| 1297 | if (ret > 0) | ||
| 1298 | ret = -EIO; | ||
| 1299 | return ret; | ||
| 1300 | } | ||
| 1301 | done: | ||
| 1302 | if (!PageHWPoison(hpage)) | ||
| 1303 | atomic_long_add(1 << compound_order(hpage), &mce_bad_pages); | ||
| 1304 | set_page_hwpoison_huge_page(hpage); | ||
| 1305 | dequeue_hwpoisoned_huge_page(hpage); | ||
| 1306 | /* keep elevated page count for bad page */ | ||
| 1307 | return ret; | ||
| 1308 | } | ||
| 1309 | |||
| 1229 | /** | 1310 | /** | 
| 1230 | * soft_offline_page - Soft offline a page. | 1311 | * soft_offline_page - Soft offline a page. | 
| 1231 | * @page: page to offline | 1312 | * @page: page to offline | 
| @@ -1253,6 +1334,9 @@ int soft_offline_page(struct page *page, int flags) | |||
| 1253 | int ret; | 1334 | int ret; | 
| 1254 | unsigned long pfn = page_to_pfn(page); | 1335 | unsigned long pfn = page_to_pfn(page); | 
| 1255 | 1336 | ||
| 1337 | if (PageHuge(page)) | ||
| 1338 | return soft_offline_huge_page(page, flags); | ||
| 1339 | |||
| 1256 | ret = get_any_page(page, pfn, flags); | 1340 | ret = get_any_page(page, pfn, flags); | 
| 1257 | if (ret < 0) | 1341 | if (ret < 0) | 
| 1258 | return ret; | 1342 | return ret; | 
