aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/hwpoison-inject.c2
-rw-r--r--mm/memory-failure.c194
2 files changed, 190 insertions, 6 deletions
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index c597f46ac18a..a77fe3f9e211 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -29,7 +29,7 @@ static int hwpoison_inject(void *data, u64 val)
29 return 0; 29 return 0;
30 30
31 if (!PageLRU(p)) 31 if (!PageLRU(p))
32 shake_page(p); 32 shake_page(p, 0);
33 /* 33 /*
34 * This implies unable to support non-LRU pages. 34 * This implies unable to support non-LRU pages.
35 */ 35 */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index b5c3b6bd511f..bcce28755832 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -41,6 +41,9 @@
41#include <linux/pagemap.h> 41#include <linux/pagemap.h>
42#include <linux/swap.h> 42#include <linux/swap.h>
43#include <linux/backing-dev.h> 43#include <linux/backing-dev.h>
44#include <linux/migrate.h>
45#include <linux/page-isolation.h>
46#include <linux/suspend.h>
44#include "internal.h" 47#include "internal.h"
45 48
46int sysctl_memory_failure_early_kill __read_mostly = 0; 49int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -201,7 +204,7 @@ static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
201 * When a unknown page type is encountered drain as many buffers as possible 204 * When a unknown page type is encountered drain as many buffers as possible
202 * in the hope to turn the page into a LRU or free page, which we can handle. 205 * in the hope to turn the page into a LRU or free page, which we can handle.
203 */ 206 */
204void shake_page(struct page *p) 207void shake_page(struct page *p, int access)
205{ 208{
206 if (!PageSlab(p)) { 209 if (!PageSlab(p)) {
207 lru_add_drain_all(); 210 lru_add_drain_all();
@@ -211,11 +214,19 @@ void shake_page(struct page *p)
211 if (PageLRU(p) || is_free_buddy_page(p)) 214 if (PageLRU(p) || is_free_buddy_page(p))
212 return; 215 return;
213 } 216 }
217
214 /* 218 /*
215 * Could call shrink_slab here (which would also 219 * Only all shrink_slab here (which would also
216 * shrink other caches). Unfortunately that might 220 * shrink other caches) if access is not potentially fatal.
217 * also access the corrupted page, which could be fatal.
218 */ 221 */
222 if (access) {
223 int nr;
224 do {
225 nr = shrink_slab(1000, GFP_KERNEL, 1000);
226 if (page_count(p) == 0)
227 break;
228 } while (nr > 10);
229 }
219} 230}
220EXPORT_SYMBOL_GPL(shake_page); 231EXPORT_SYMBOL_GPL(shake_page);
221 232
@@ -949,7 +960,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
949 * walked by the page reclaim code, however that's not a big loss. 960 * walked by the page reclaim code, however that's not a big loss.
950 */ 961 */
951 if (!PageLRU(p)) 962 if (!PageLRU(p))
952 shake_page(p); 963 shake_page(p, 0);
953 if (!PageLRU(p)) { 964 if (!PageLRU(p)) {
954 /* 965 /*
955 * shake_page could have turned it free. 966 * shake_page could have turned it free.
@@ -1099,3 +1110,176 @@ int unpoison_memory(unsigned long pfn)
1099 return 0; 1110 return 0;
1100} 1111}
1101EXPORT_SYMBOL(unpoison_memory); 1112EXPORT_SYMBOL(unpoison_memory);
1113
1114static struct page *new_page(struct page *p, unsigned long private, int **x)
1115{
1116 return alloc_pages(GFP_HIGHUSER_MOVABLE, 0);
1117}
1118
1119/*
1120 * Safely get reference count of an arbitrary page.
1121 * Returns 0 for a free page, -EIO for a zero refcount page
1122 * that is not free, and 1 for any other page type.
1123 * For 1 the page is returned with increased page count, otherwise not.
1124 */
1125static int get_any_page(struct page *p, unsigned long pfn, int flags)
1126{
1127 int ret;
1128
1129 if (flags & MF_COUNT_INCREASED)
1130 return 1;
1131
1132 /*
1133 * The lock_system_sleep prevents a race with memory hotplug,
1134 * because the isolation assumes there's only a single user.
1135 * This is a big hammer, a better would be nicer.
1136 */
1137 lock_system_sleep();
1138
1139 /*
1140 * Isolate the page, so that it doesn't get reallocated if it
1141 * was free.
1142 */
1143 set_migratetype_isolate(p);
1144 if (!get_page_unless_zero(compound_head(p))) {
1145 if (is_free_buddy_page(p)) {
1146 pr_debug("get_any_page: %#lx free buddy page\n", pfn);
1147 /* Set hwpoison bit while page is still isolated */
1148 SetPageHWPoison(p);
1149 ret = 0;
1150 } else {
1151 pr_debug("get_any_page: %#lx: unknown zero refcount page type %lx\n",
1152 pfn, p->flags);
1153 ret = -EIO;
1154 }
1155 } else {
1156 /* Not a free page */
1157 ret = 1;
1158 }
1159 unset_migratetype_isolate(p);
1160 unlock_system_sleep();
1161 return ret;
1162}
1163
1164/**
1165 * soft_offline_page - Soft offline a page.
1166 * @page: page to offline
1167 * @flags: flags. Same as memory_failure().
1168 *
1169 * Returns 0 on success, otherwise negated errno.
1170 *
1171 * Soft offline a page, by migration or invalidation,
1172 * without killing anything. This is for the case when
1173 * a page is not corrupted yet (so it's still valid to access),
1174 * but has had a number of corrected errors and is better taken
1175 * out.
1176 *
1177 * The actual policy on when to do that is maintained by
1178 * user space.
1179 *
1180 * This should never impact any application or cause data loss,
1181 * however it might take some time.
1182 *
1183 * This is not a 100% solution for all memory, but tries to be
1184 * ``good enough'' for the majority of memory.
1185 */
1186int soft_offline_page(struct page *page, int flags)
1187{
1188 int ret;
1189 unsigned long pfn = page_to_pfn(page);
1190
1191 ret = get_any_page(page, pfn, flags);
1192 if (ret < 0)
1193 return ret;
1194 if (ret == 0)
1195 goto done;
1196
1197 /*
1198 * Page cache page we can handle?
1199 */
1200 if (!PageLRU(page)) {
1201 /*
1202 * Try to free it.
1203 */
1204 put_page(page);
1205 shake_page(page, 1);
1206
1207 /*
1208 * Did it turn free?
1209 */
1210 ret = get_any_page(page, pfn, 0);
1211 if (ret < 0)
1212 return ret;
1213 if (ret == 0)
1214 goto done;
1215 }
1216 if (!PageLRU(page)) {
1217 pr_debug("soft_offline: %#lx: unknown non LRU page type %lx\n",
1218 pfn, page->flags);
1219 return -EIO;
1220 }
1221
1222 lock_page(page);
1223 wait_on_page_writeback(page);
1224
1225 /*
1226 * Synchronized using the page lock with memory_failure()
1227 */
1228 if (PageHWPoison(page)) {
1229 unlock_page(page);
1230 put_page(page);
1231 pr_debug("soft offline: %#lx page already poisoned\n", pfn);
1232 return -EBUSY;
1233 }
1234
1235 /*
1236 * Try to invalidate first. This should work for
1237 * non dirty unmapped page cache pages.
1238 */
1239 ret = invalidate_inode_page(page);
1240 unlock_page(page);
1241
1242 /*
1243 * Drop count because page migration doesn't like raised
1244 * counts. The page could get re-allocated, but if it becomes
1245 * LRU the isolation will just fail.
1246 * RED-PEN would be better to keep it isolated here, but we
1247 * would need to fix isolation locking first.
1248 */
1249 put_page(page);
1250 if (ret == 1) {
1251 ret = 0;
1252 pr_debug("soft_offline: %#lx: invalidated\n", pfn);
1253 goto done;
1254 }
1255
1256 /*
1257 * Simple invalidation didn't work.
1258 * Try to migrate to a new page instead. migrate.c
1259 * handles a large number of cases for us.
1260 */
1261 ret = isolate_lru_page(page);
1262 if (!ret) {
1263 LIST_HEAD(pagelist);
1264
1265 list_add(&page->lru, &pagelist);
1266 ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0);
1267 if (ret) {
1268 pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
1269 pfn, ret, page->flags);
1270 if (ret > 0)
1271 ret = -EIO;
1272 }
1273 } else {
1274 pr_debug("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
1275 pfn, ret, page_count(page), page->flags);
1276 }
1277 if (ret)
1278 return ret;
1279
1280done:
1281 atomic_long_add(1, &mce_bad_pages);
1282 SetPageHWPoison(page);
1283 /* keep elevated page count for bad page */
1284 return ret;
1285}