diff options
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 26 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 179 | ||||
-rw-r--r-- | drivers/base/memory.c | 2 | ||||
-rw-r--r-- | include/linux/mm.h | 4 | ||||
-rw-r--r-- | mm/hwpoison-inject.c | 4 | ||||
-rw-r--r-- | mm/madvise.c | 2 | ||||
-rw-r--r-- | mm/memory-failure.c | 96 |
7 files changed, 205 insertions, 108 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 7395d5f4272d..0c82091b1652 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -54,7 +54,14 @@ static struct severity { | |||
54 | #define MASK(x, y) .mask = x, .result = y | 54 | #define MASK(x, y) .mask = x, .result = y |
55 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) | 55 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) |
56 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) | 56 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) |
57 | #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) | ||
57 | #define MCACOD 0xffff | 58 | #define MCACOD 0xffff |
59 | /* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ | ||
60 | #define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ | ||
61 | #define MCACOD_SCRUBMSK 0xfff0 | ||
62 | #define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ | ||
63 | #define MCACOD_DATA 0x0134 /* Data Load */ | ||
64 | #define MCACOD_INSTR 0x0150 /* Instruction Fetch */ | ||
58 | 65 | ||
59 | MCESEV( | 66 | MCESEV( |
60 | NO, "Invalid", | 67 | NO, "Invalid", |
@@ -102,11 +109,24 @@ static struct severity { | |||
102 | SER, BITCLR(MCI_STATUS_S) | 109 | SER, BITCLR(MCI_STATUS_S) |
103 | ), | 110 | ), |
104 | 111 | ||
105 | /* AR add known MCACODs here */ | ||
106 | MCESEV( | 112 | MCESEV( |
107 | PANIC, "Action required with lost events", | 113 | PANIC, "Action required with lost events", |
108 | SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR) | 114 | SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR) |
109 | ), | 115 | ), |
116 | |||
117 | /* known AR MCACODs: */ | ||
118 | #ifdef CONFIG_MEMORY_FAILURE | ||
119 | MCESEV( | ||
120 | KEEP, "HT thread notices Action required: data load error", | ||
121 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), | ||
122 | MCGMASK(MCG_STATUS_EIPV, 0) | ||
123 | ), | ||
124 | MCESEV( | ||
125 | AR, "Action required: data load error", | ||
126 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), | ||
127 | USER | ||
128 | ), | ||
129 | #endif | ||
110 | MCESEV( | 130 | MCESEV( |
111 | PANIC, "Action required: unknown MCACOD", | 131 | PANIC, "Action required: unknown MCACOD", |
112 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) | 132 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) |
@@ -115,11 +135,11 @@ static struct severity { | |||
115 | /* known AO MCACODs: */ | 135 | /* known AO MCACODs: */ |
116 | MCESEV( | 136 | MCESEV( |
117 | AO, "Action optional: memory scrubbing error", | 137 | AO, "Action optional: memory scrubbing error", |
118 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0) | 138 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB) |
119 | ), | 139 | ), |
120 | MCESEV( | 140 | MCESEV( |
121 | AO, "Action optional: last level cache writeback error", | 141 | AO, "Action optional: last level cache writeback error", |
122 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a) | 142 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB) |
123 | ), | 143 | ), |
124 | MCESEV( | 144 | MCESEV( |
125 | SOME, "Action optional: unknown MCACOD", | 145 | SOME, "Action optional: unknown MCACOD", |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5a11ae2e9e91..ad573d8baf10 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -540,6 +540,27 @@ static void mce_report_event(struct pt_regs *regs) | |||
540 | irq_work_queue(&__get_cpu_var(mce_irq_work)); | 540 | irq_work_queue(&__get_cpu_var(mce_irq_work)); |
541 | } | 541 | } |
542 | 542 | ||
543 | /* | ||
544 | * Read ADDR and MISC registers. | ||
545 | */ | ||
546 | static void mce_read_aux(struct mce *m, int i) | ||
547 | { | ||
548 | if (m->status & MCI_STATUS_MISCV) | ||
549 | m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); | ||
550 | if (m->status & MCI_STATUS_ADDRV) { | ||
551 | m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); | ||
552 | |||
553 | /* | ||
554 | * Mask the reported address by the reported granularity. | ||
555 | */ | ||
556 | if (mce_ser && (m->status & MCI_STATUS_MISCV)) { | ||
557 | u8 shift = MCI_MISC_ADDR_LSB(m->misc); | ||
558 | m->addr >>= shift; | ||
559 | m->addr <<= shift; | ||
560 | } | ||
561 | } | ||
562 | } | ||
563 | |||
543 | DEFINE_PER_CPU(unsigned, mce_poll_count); | 564 | DEFINE_PER_CPU(unsigned, mce_poll_count); |
544 | 565 | ||
545 | /* | 566 | /* |
@@ -590,10 +611,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
590 | (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) | 611 | (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) |
591 | continue; | 612 | continue; |
592 | 613 | ||
593 | if (m.status & MCI_STATUS_MISCV) | 614 | mce_read_aux(&m, i); |
594 | m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); | ||
595 | if (m.status & MCI_STATUS_ADDRV) | ||
596 | m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); | ||
597 | 615 | ||
598 | if (!(flags & MCP_TIMESTAMP)) | 616 | if (!(flags & MCP_TIMESTAMP)) |
599 | m.tsc = 0; | 617 | m.tsc = 0; |
@@ -917,6 +935,49 @@ static void mce_clear_state(unsigned long *toclear) | |||
917 | } | 935 | } |
918 | 936 | ||
919 | /* | 937 | /* |
938 | * Need to save faulting physical address associated with a process | ||
939 | * in the machine check handler some place where we can grab it back | ||
940 | * later in mce_notify_process() | ||
941 | */ | ||
942 | #define MCE_INFO_MAX 16 | ||
943 | |||
944 | struct mce_info { | ||
945 | atomic_t inuse; | ||
946 | struct task_struct *t; | ||
947 | __u64 paddr; | ||
948 | } mce_info[MCE_INFO_MAX]; | ||
949 | |||
950 | static void mce_save_info(__u64 addr) | ||
951 | { | ||
952 | struct mce_info *mi; | ||
953 | |||
954 | for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) { | ||
955 | if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { | ||
956 | mi->t = current; | ||
957 | mi->paddr = addr; | ||
958 | return; | ||
959 | } | ||
960 | } | ||
961 | |||
962 | mce_panic("Too many concurrent recoverable errors", NULL, NULL); | ||
963 | } | ||
964 | |||
965 | static struct mce_info *mce_find_info(void) | ||
966 | { | ||
967 | struct mce_info *mi; | ||
968 | |||
969 | for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) | ||
970 | if (atomic_read(&mi->inuse) && mi->t == current) | ||
971 | return mi; | ||
972 | return NULL; | ||
973 | } | ||
974 | |||
975 | static void mce_clear_info(struct mce_info *mi) | ||
976 | { | ||
977 | atomic_set(&mi->inuse, 0); | ||
978 | } | ||
979 | |||
980 | /* | ||
920 | * The actual machine check handler. This only handles real | 981 | * The actual machine check handler. This only handles real |
921 | * exceptions when something got corrupted coming in through int 18. | 982 | * exceptions when something got corrupted coming in through int 18. |
922 | * | 983 | * |
@@ -969,7 +1030,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
969 | barrier(); | 1030 | barrier(); |
970 | 1031 | ||
971 | /* | 1032 | /* |
972 | * When no restart IP must always kill or panic. | 1033 | * When no restart IP might need to kill or panic. |
1034 | * Assume the worst for now, but if we find the | ||
1035 | * severity is MCE_AR_SEVERITY we have other options. | ||
973 | */ | 1036 | */ |
974 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | 1037 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) |
975 | kill_it = 1; | 1038 | kill_it = 1; |
@@ -1023,16 +1086,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1023 | continue; | 1086 | continue; |
1024 | } | 1087 | } |
1025 | 1088 | ||
1026 | /* | 1089 | mce_read_aux(&m, i); |
1027 | * Kill on action required. | ||
1028 | */ | ||
1029 | if (severity == MCE_AR_SEVERITY) | ||
1030 | kill_it = 1; | ||
1031 | |||
1032 | if (m.status & MCI_STATUS_MISCV) | ||
1033 | m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); | ||
1034 | if (m.status & MCI_STATUS_ADDRV) | ||
1035 | m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); | ||
1036 | 1090 | ||
1037 | /* | 1091 | /* |
1038 | * Action optional error. Queue address for later processing. | 1092 | * Action optional error. Queue address for later processing. |
@@ -1052,6 +1106,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1052 | } | 1106 | } |
1053 | } | 1107 | } |
1054 | 1108 | ||
1109 | /* mce_clear_state will clear *final, save locally for use later */ | ||
1110 | m = *final; | ||
1111 | |||
1055 | if (!no_way_out) | 1112 | if (!no_way_out) |
1056 | mce_clear_state(toclear); | 1113 | mce_clear_state(toclear); |
1057 | 1114 | ||
@@ -1063,27 +1120,22 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1063 | no_way_out = worst >= MCE_PANIC_SEVERITY; | 1120 | no_way_out = worst >= MCE_PANIC_SEVERITY; |
1064 | 1121 | ||
1065 | /* | 1122 | /* |
1066 | * If we have decided that we just CAN'T continue, and the user | 1123 | * At insane "tolerant" levels we take no action. Otherwise |
1067 | * has not set tolerant to an insane level, give up and die. | 1124 | * we only die if we have no other choice. For less serious |
1068 | * | 1125 | * issues we try to recover, or limit damage to the current |
1069 | * This is mainly used in the case when the system doesn't | 1126 | * process. |
1070 | * support MCE broadcasting or it has been disabled. | ||
1071 | */ | ||
1072 | if (no_way_out && tolerant < 3) | ||
1073 | mce_panic("Fatal machine check on current CPU", final, msg); | ||
1074 | |||
1075 | /* | ||
1076 | * If the error seems to be unrecoverable, something should be | ||
1077 | * done. Try to kill as little as possible. If we can kill just | ||
1078 | * one task, do that. If the user has set the tolerance very | ||
1079 | * high, don't try to do anything at all. | ||
1080 | */ | 1127 | */ |
1081 | 1128 | if (tolerant < 3) { | |
1082 | if (kill_it && tolerant < 3) | 1129 | if (no_way_out) |
1083 | force_sig(SIGBUS, current); | 1130 | mce_panic("Fatal machine check on current CPU", &m, msg); |
1084 | 1131 | if (worst == MCE_AR_SEVERITY) { | |
1085 | /* notify userspace ASAP */ | 1132 | /* schedule action before return to userland */ |
1086 | set_thread_flag(TIF_MCE_NOTIFY); | 1133 | mce_save_info(m.addr); |
1134 | set_thread_flag(TIF_MCE_NOTIFY); | ||
1135 | } else if (kill_it) { | ||
1136 | force_sig(SIGBUS, current); | ||
1137 | } | ||
1138 | } | ||
1087 | 1139 | ||
1088 | if (worst > 0) | 1140 | if (worst > 0) |
1089 | mce_report_event(regs); | 1141 | mce_report_event(regs); |
@@ -1094,34 +1146,57 @@ out: | |||
1094 | } | 1146 | } |
1095 | EXPORT_SYMBOL_GPL(do_machine_check); | 1147 | EXPORT_SYMBOL_GPL(do_machine_check); |
1096 | 1148 | ||
1097 | /* dummy to break dependency. actual code is in mm/memory-failure.c */ | 1149 | #ifndef CONFIG_MEMORY_FAILURE |
1098 | void __attribute__((weak)) memory_failure(unsigned long pfn, int vector) | 1150 | int memory_failure(unsigned long pfn, int vector, int flags) |
1099 | { | 1151 | { |
1100 | printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn); | 1152 | /* mce_severity() should not hand us an ACTION_REQUIRED error */ |
1153 | BUG_ON(flags & MF_ACTION_REQUIRED); | ||
1154 | printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n" | ||
1155 | "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn); | ||
1156 | |||
1157 | return 0; | ||
1101 | } | 1158 | } |
1159 | #endif | ||
1102 | 1160 | ||
1103 | /* | 1161 | /* |
1104 | * Called after mce notification in process context. This code | 1162 | * Called in process context that interrupted by MCE and marked with |
1105 | * is allowed to sleep. Call the high level VM handler to process | 1163 | * TIF_MCE_NOTIFY, just before returning to erroneous userland. |
1106 | * any corrupted pages. | 1164 | * This code is allowed to sleep. |
1107 | * Assume that the work queue code only calls this one at a time | 1165 | * Attempt possible recovery such as calling the high level VM handler to |
1108 | * per CPU. | 1166 | * process any corrupted pages, and kill/signal current process if required. |
1109 | * Note we don't disable preemption, so this code might run on the wrong | 1167 | * Action required errors are handled here. |
1110 | * CPU. In this case the event is picked up by the scheduled work queue. | ||
1111 | * This is merely a fast path to expedite processing in some common | ||
1112 | * cases. | ||
1113 | */ | 1168 | */ |
1114 | void mce_notify_process(void) | 1169 | void mce_notify_process(void) |
1115 | { | 1170 | { |
1116 | unsigned long pfn; | 1171 | unsigned long pfn; |
1117 | mce_notify_irq(); | 1172 | struct mce_info *mi = mce_find_info(); |
1118 | while (mce_ring_get(&pfn)) | 1173 | |
1119 | memory_failure(pfn, MCE_VECTOR); | 1174 | if (!mi) |
1175 | mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); | ||
1176 | pfn = mi->paddr >> PAGE_SHIFT; | ||
1177 | |||
1178 | clear_thread_flag(TIF_MCE_NOTIFY); | ||
1179 | |||
1180 | pr_err("Uncorrected hardware memory error in user-access at %llx", | ||
1181 | mi->paddr); | ||
1182 | if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) { | ||
1183 | pr_err("Memory error not recovered"); | ||
1184 | force_sig(SIGBUS, current); | ||
1185 | } | ||
1186 | mce_clear_info(mi); | ||
1120 | } | 1187 | } |
1121 | 1188 | ||
1189 | /* | ||
1190 | * Action optional processing happens here (picking up | ||
1191 | * from the list of faulting pages that do_machine_check() | ||
1192 | * placed into the "ring"). | ||
1193 | */ | ||
1122 | static void mce_process_work(struct work_struct *dummy) | 1194 | static void mce_process_work(struct work_struct *dummy) |
1123 | { | 1195 | { |
1124 | mce_notify_process(); | 1196 | unsigned long pfn; |
1197 | |||
1198 | while (mce_ring_get(&pfn)) | ||
1199 | memory_failure(pfn, MCE_VECTOR, 0); | ||
1125 | } | 1200 | } |
1126 | 1201 | ||
1127 | #ifdef CONFIG_X86_MCE_INTEL | 1202 | #ifdef CONFIG_X86_MCE_INTEL |
@@ -1211,8 +1286,6 @@ int mce_notify_irq(void) | |||
1211 | /* Not more than two messages every minute */ | 1286 | /* Not more than two messages every minute */ |
1212 | static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | 1287 | static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); |
1213 | 1288 | ||
1214 | clear_thread_flag(TIF_MCE_NOTIFY); | ||
1215 | |||
1216 | if (test_and_clear_bit(0, &mce_need_notify)) { | 1289 | if (test_and_clear_bit(0, &mce_need_notify)) { |
1217 | /* wake processes polling /dev/mcelog */ | 1290 | /* wake processes polling /dev/mcelog */ |
1218 | wake_up_interruptible(&mce_chrdev_wait); | 1291 | wake_up_interruptible(&mce_chrdev_wait); |
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 9e60dbe9fd94..7dda4f790f00 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c | |||
@@ -466,7 +466,7 @@ store_hard_offline_page(struct device *dev, | |||
466 | if (strict_strtoull(buf, 0, &pfn) < 0) | 466 | if (strict_strtoull(buf, 0, &pfn) < 0) |
467 | return -EINVAL; | 467 | return -EINVAL; |
468 | pfn >>= PAGE_SHIFT; | 468 | pfn >>= PAGE_SHIFT; |
469 | ret = __memory_failure(pfn, 0, 0); | 469 | ret = memory_failure(pfn, 0, 0); |
470 | return ret ? ret : count; | 470 | return ret ? ret : count; |
471 | } | 471 | } |
472 | 472 | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index 17b27cd269c4..3dc8f6b68721 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1598,9 +1598,9 @@ void vmemmap_populate_print_last(void); | |||
1598 | 1598 | ||
1599 | enum mf_flags { | 1599 | enum mf_flags { |
1600 | MF_COUNT_INCREASED = 1 << 0, | 1600 | MF_COUNT_INCREASED = 1 << 0, |
1601 | MF_ACTION_REQUIRED = 1 << 1, | ||
1601 | }; | 1602 | }; |
1602 | extern void memory_failure(unsigned long pfn, int trapno); | 1603 | extern int memory_failure(unsigned long pfn, int trapno, int flags); |
1603 | extern int __memory_failure(unsigned long pfn, int trapno, int flags); | ||
1604 | extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); | 1604 | extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); |
1605 | extern int unpoison_memory(unsigned long pfn); | 1605 | extern int unpoison_memory(unsigned long pfn); |
1606 | extern int sysctl_memory_failure_early_kill; | 1606 | extern int sysctl_memory_failure_early_kill; |
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index c7fc7fd00e32..cc448bb983ba 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c | |||
@@ -45,7 +45,7 @@ static int hwpoison_inject(void *data, u64 val) | |||
45 | * do a racy check with elevated page count, to make sure PG_hwpoison | 45 | * do a racy check with elevated page count, to make sure PG_hwpoison |
46 | * will only be set for the targeted owner (or on a free page). | 46 | * will only be set for the targeted owner (or on a free page). |
47 | * We temporarily take page lock for try_get_mem_cgroup_from_page(). | 47 | * We temporarily take page lock for try_get_mem_cgroup_from_page(). |
48 | * __memory_failure() will redo the check reliably inside page lock. | 48 | * memory_failure() will redo the check reliably inside page lock. |
49 | */ | 49 | */ |
50 | lock_page(hpage); | 50 | lock_page(hpage); |
51 | err = hwpoison_filter(hpage); | 51 | err = hwpoison_filter(hpage); |
@@ -55,7 +55,7 @@ static int hwpoison_inject(void *data, u64 val) | |||
55 | 55 | ||
56 | inject: | 56 | inject: |
57 | printk(KERN_INFO "Injecting memory failure at pfn %lx\n", pfn); | 57 | printk(KERN_INFO "Injecting memory failure at pfn %lx\n", pfn); |
58 | return __memory_failure(pfn, 18, MF_COUNT_INCREASED); | 58 | return memory_failure(pfn, 18, MF_COUNT_INCREASED); |
59 | } | 59 | } |
60 | 60 | ||
61 | static int hwpoison_unpoison(void *data, u64 val) | 61 | static int hwpoison_unpoison(void *data, u64 val) |
diff --git a/mm/madvise.c b/mm/madvise.c index 74bf193eff04..f5ab745672b7 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -251,7 +251,7 @@ static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end) | |||
251 | printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n", | 251 | printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n", |
252 | page_to_pfn(p), start); | 252 | page_to_pfn(p), start); |
253 | /* Ignore return value for now */ | 253 | /* Ignore return value for now */ |
254 | __memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED); | 254 | memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED); |
255 | } | 255 | } |
256 | return ret; | 256 | return ret; |
257 | } | 257 | } |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 56080ea36140..0f6033b01ffc 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -187,33 +187,40 @@ int hwpoison_filter(struct page *p) | |||
187 | EXPORT_SYMBOL_GPL(hwpoison_filter); | 187 | EXPORT_SYMBOL_GPL(hwpoison_filter); |
188 | 188 | ||
189 | /* | 189 | /* |
190 | * Send all the processes who have the page mapped an ``action optional'' | 190 | * Send all the processes who have the page mapped a signal. |
191 | * signal. | 191 | * ``action optional'' if they are not immediately affected by the error |
192 | * ``action required'' if error happened in current execution context | ||
192 | */ | 193 | */ |
193 | static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno, | 194 | static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, |
194 | unsigned long pfn, struct page *page) | 195 | unsigned long pfn, struct page *page, int flags) |
195 | { | 196 | { |
196 | struct siginfo si; | 197 | struct siginfo si; |
197 | int ret; | 198 | int ret; |
198 | 199 | ||
199 | printk(KERN_ERR | 200 | printk(KERN_ERR |
200 | "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n", | 201 | "MCE %#lx: Killing %s:%d due to hardware memory corruption\n", |
201 | pfn, t->comm, t->pid); | 202 | pfn, t->comm, t->pid); |
202 | si.si_signo = SIGBUS; | 203 | si.si_signo = SIGBUS; |
203 | si.si_errno = 0; | 204 | si.si_errno = 0; |
204 | si.si_code = BUS_MCEERR_AO; | ||
205 | si.si_addr = (void *)addr; | 205 | si.si_addr = (void *)addr; |
206 | #ifdef __ARCH_SI_TRAPNO | 206 | #ifdef __ARCH_SI_TRAPNO |
207 | si.si_trapno = trapno; | 207 | si.si_trapno = trapno; |
208 | #endif | 208 | #endif |
209 | si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; | 209 | si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; |
210 | /* | 210 | |
211 | * Don't use force here, it's convenient if the signal | 211 | if ((flags & MF_ACTION_REQUIRED) && t == current) { |
212 | * can be temporarily blocked. | 212 | si.si_code = BUS_MCEERR_AR; |
213 | * This could cause a loop when the user sets SIGBUS | 213 | ret = force_sig_info(SIGBUS, &si, t); |
214 | * to SIG_IGN, but hopefully no one will do that? | 214 | } else { |
215 | */ | 215 | /* |
216 | ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ | 216 | * Don't use force here, it's convenient if the signal |
217 | * can be temporarily blocked. | ||
218 | * This could cause a loop when the user sets SIGBUS | ||
219 | * to SIG_IGN, but hopefully no one will do that? | ||
220 | */ | ||
221 | si.si_code = BUS_MCEERR_AO; | ||
222 | ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ | ||
223 | } | ||
217 | if (ret < 0) | 224 | if (ret < 0) |
218 | printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n", | 225 | printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n", |
219 | t->comm, t->pid, ret); | 226 | t->comm, t->pid, ret); |
@@ -338,8 +345,9 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, | |||
338 | * Also when FAIL is set do a force kill because something went | 345 | * Also when FAIL is set do a force kill because something went |
339 | * wrong earlier. | 346 | * wrong earlier. |
340 | */ | 347 | */ |
341 | static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, | 348 | static void kill_procs(struct list_head *to_kill, int doit, int trapno, |
342 | int fail, struct page *page, unsigned long pfn) | 349 | int fail, struct page *page, unsigned long pfn, |
350 | int flags) | ||
343 | { | 351 | { |
344 | struct to_kill *tk, *next; | 352 | struct to_kill *tk, *next; |
345 | 353 | ||
@@ -363,8 +371,8 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, | |||
363 | * check for that, but we need to tell the | 371 | * check for that, but we need to tell the |
364 | * process anyways. | 372 | * process anyways. |
365 | */ | 373 | */ |
366 | else if (kill_proc_ao(tk->tsk, tk->addr, trapno, | 374 | else if (kill_proc(tk->tsk, tk->addr, trapno, |
367 | pfn, page) < 0) | 375 | pfn, page, flags) < 0) |
368 | printk(KERN_ERR | 376 | printk(KERN_ERR |
369 | "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", | 377 | "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", |
370 | pfn, tk->tsk->comm, tk->tsk->pid); | 378 | pfn, tk->tsk->comm, tk->tsk->pid); |
@@ -844,7 +852,7 @@ static int page_action(struct page_state *ps, struct page *p, | |||
844 | * the pages and send SIGBUS to the processes if the data was dirty. | 852 | * the pages and send SIGBUS to the processes if the data was dirty. |
845 | */ | 853 | */ |
846 | static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | 854 | static int hwpoison_user_mappings(struct page *p, unsigned long pfn, |
847 | int trapno) | 855 | int trapno, int flags) |
848 | { | 856 | { |
849 | enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; | 857 | enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; |
850 | struct address_space *mapping; | 858 | struct address_space *mapping; |
@@ -962,8 +970,8 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
962 | * use a more force-full uncatchable kill to prevent | 970 | * use a more force-full uncatchable kill to prevent |
963 | * any accesses to the poisoned memory. | 971 | * any accesses to the poisoned memory. |
964 | */ | 972 | */ |
965 | kill_procs_ao(&tokill, !!PageDirty(ppage), trapno, | 973 | kill_procs(&tokill, !!PageDirty(ppage), trapno, |
966 | ret != SWAP_SUCCESS, p, pfn); | 974 | ret != SWAP_SUCCESS, p, pfn, flags); |
967 | 975 | ||
968 | return ret; | 976 | return ret; |
969 | } | 977 | } |
@@ -984,7 +992,25 @@ static void clear_page_hwpoison_huge_page(struct page *hpage) | |||
984 | ClearPageHWPoison(hpage + i); | 992 | ClearPageHWPoison(hpage + i); |
985 | } | 993 | } |
986 | 994 | ||
987 | int __memory_failure(unsigned long pfn, int trapno, int flags) | 995 | /** |
996 | * memory_failure - Handle memory failure of a page. | ||
997 | * @pfn: Page Number of the corrupted page | ||
998 | * @trapno: Trap number reported in the signal to user space. | ||
999 | * @flags: fine tune action taken | ||
1000 | * | ||
1001 | * This function is called by the low level machine check code | ||
1002 | * of an architecture when it detects hardware memory corruption | ||
1003 | * of a page. It tries its best to recover, which includes | ||
1004 | * dropping pages, killing processes etc. | ||
1005 | * | ||
1006 | * The function is primarily of use for corruptions that | ||
1007 | * happen outside the current execution context (e.g. when | ||
1008 | * detected by a background scrubber) | ||
1009 | * | ||
1010 | * Must run in process context (e.g. a work queue) with interrupts | ||
1011 | * enabled and no spinlocks hold. | ||
1012 | */ | ||
1013 | int memory_failure(unsigned long pfn, int trapno, int flags) | ||
988 | { | 1014 | { |
989 | struct page_state *ps; | 1015 | struct page_state *ps; |
990 | struct page *p; | 1016 | struct page *p; |
@@ -1130,7 +1156,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
1130 | * Now take care of user space mappings. | 1156 | * Now take care of user space mappings. |
1131 | * Abort on fail: __delete_from_page_cache() assumes unmapped page. | 1157 | * Abort on fail: __delete_from_page_cache() assumes unmapped page. |
1132 | */ | 1158 | */ |
1133 | if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) { | 1159 | if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) { |
1134 | printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn); | 1160 | printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn); |
1135 | res = -EBUSY; | 1161 | res = -EBUSY; |
1136 | goto out; | 1162 | goto out; |
@@ -1156,29 +1182,7 @@ out: | |||
1156 | unlock_page(hpage); | 1182 | unlock_page(hpage); |
1157 | return res; | 1183 | return res; |
1158 | } | 1184 | } |
1159 | EXPORT_SYMBOL_GPL(__memory_failure); | 1185 | EXPORT_SYMBOL_GPL(memory_failure); |
1160 | |||
1161 | /** | ||
1162 | * memory_failure - Handle memory failure of a page. | ||
1163 | * @pfn: Page Number of the corrupted page | ||
1164 | * @trapno: Trap number reported in the signal to user space. | ||
1165 | * | ||
1166 | * This function is called by the low level machine check code | ||
1167 | * of an architecture when it detects hardware memory corruption | ||
1168 | * of a page. It tries its best to recover, which includes | ||
1169 | * dropping pages, killing processes etc. | ||
1170 | * | ||
1171 | * The function is primarily of use for corruptions that | ||
1172 | * happen outside the current execution context (e.g. when | ||
1173 | * detected by a background scrubber) | ||
1174 | * | ||
1175 | * Must run in process context (e.g. a work queue) with interrupts | ||
1176 | * enabled and no spinlocks hold. | ||
1177 | */ | ||
1178 | void memory_failure(unsigned long pfn, int trapno) | ||
1179 | { | ||
1180 | __memory_failure(pfn, trapno, 0); | ||
1181 | } | ||
1182 | 1186 | ||
1183 | #define MEMORY_FAILURE_FIFO_ORDER 4 | 1187 | #define MEMORY_FAILURE_FIFO_ORDER 4 |
1184 | #define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER) | 1188 | #define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER) |
@@ -1251,7 +1255,7 @@ static void memory_failure_work_func(struct work_struct *work) | |||
1251 | spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); | 1255 | spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); |
1252 | if (!gotten) | 1256 | if (!gotten) |
1253 | break; | 1257 | break; |
1254 | __memory_failure(entry.pfn, entry.trapno, entry.flags); | 1258 | memory_failure(entry.pfn, entry.trapno, entry.flags); |
1255 | } | 1259 | } |
1256 | } | 1260 | } |
1257 | 1261 | ||