aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorHarvey Harrison <harvey.harrison@gmail.com>2008-01-30 07:32:35 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-30 07:32:35 -0500
commit1dc85be087d6645575847dc23c37147a2352312b (patch)
treef5bd8543789decca03c7d8a152fc2427af71c975 /arch
parentb6795e65f158d12d3124379fc50ec156ae60f888 (diff)
x86: begin fault_{32|64}.c unification
Move X86_32 only get_segment_eip to X86_64 Move X86_64 only is_errata93 to X86_32 Change X86_32 loop in is_prefetch to highlight the differences between them. Fold the logic from __is_prefetch in as well on X86_32. Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/mm/fault_32.c97
-rw-r--r--arch/x86/mm/fault_64.c124
2 files changed, 196 insertions, 25 deletions
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 870b5610555c..300c9d8b684a 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -61,6 +61,7 @@ static inline int notify_page_fault(struct pt_regs *regs)
61#endif 61#endif
62} 62}
63 63
64#ifdef CONFIG_X86_32
64/* 65/*
65 * Return EIP plus the CS segment base. The segment limit is also 66 * Return EIP plus the CS segment base. The segment limit is also
66 * adjusted, clamped to the kernel/user address space (whichever is 67 * adjusted, clamped to the kernel/user address space (whichever is
@@ -135,26 +136,61 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
135 *eip_limit = seg_limit; 136 *eip_limit = seg_limit;
136 return ip + base; 137 return ip + base;
137} 138}
139#endif
138 140
139/* 141/*
142 * X86_32
140 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. 143 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
141 * Check that here and ignore it. 144 * Check that here and ignore it.
145 *
146 * X86_64
147 * Sometimes the CPU reports invalid exceptions on prefetch.
148 * Check that here and ignore it.
149 *
150 * Opcode checker based on code by Richard Brunner
142 */ 151 */
143static int __is_prefetch(struct pt_regs *regs, unsigned long addr) 152static int is_prefetch(struct pt_regs *regs, unsigned long addr,
153 unsigned long error_code)
144{ 154{
145 unsigned long limit; 155 unsigned char *instr;
146 unsigned char *instr = (unsigned char *)get_segment_eip(regs, &limit);
147 int scan_more = 1; 156 int scan_more = 1;
148 int prefetch = 0; 157 int prefetch = 0;
149 int i; 158 unsigned char *max_instr;
159
160#ifdef CONFIG_X86_32
161 unsigned long limit;
162 if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
163 boot_cpu_data.x86 >= 6)) {
164 /* Catch an obscure case of prefetch inside an NX page. */
165 if (nx_enabled && (error_code & PF_INSTR))
166 return 0;
167 } else {
168 return 0;
169 }
170 instr = (unsigned char *)get_segment_eip(regs, &limit);
171#else
172 /* If it was a exec fault ignore */
173 if (error_code & PF_INSTR)
174 return 0;
175 instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
176#endif
177
178 max_instr = instr + 15;
179
180#ifdef CONFIG_X86_64
181 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
182 return 0;
183#endif
150 184
151 for (i = 0; scan_more && i < 15; i++) { 185 while (scan_more && instr < max_instr) {
152 unsigned char opcode; 186 unsigned char opcode;
153 unsigned char instr_hi; 187 unsigned char instr_hi;
154 unsigned char instr_lo; 188 unsigned char instr_lo;
155 189
190#ifdef CONFIG_X86_32
156 if (instr > (unsigned char *)limit) 191 if (instr > (unsigned char *)limit)
157 break; 192 break;
193#endif
158 if (probe_kernel_address(instr, opcode)) 194 if (probe_kernel_address(instr, opcode))
159 break; 195 break;
160 196
@@ -196,8 +232,10 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
196 case 0x00: 232 case 0x00:
197 /* Prefetch instruction is 0x0F0D or 0x0F18 */ 233 /* Prefetch instruction is 0x0F0D or 0x0F18 */
198 scan_more = 0; 234 scan_more = 0;
235#ifdef CONFIG_X86_32
199 if (instr > (unsigned char *)limit) 236 if (instr > (unsigned char *)limit)
200 break; 237 break;
238#endif
201 if (probe_kernel_address(instr, opcode)) 239 if (probe_kernel_address(instr, opcode))
202 break; 240 break;
203 prefetch = (instr_lo == 0xF) && 241 prefetch = (instr_lo == 0xF) &&
@@ -211,19 +249,6 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
211 return prefetch; 249 return prefetch;
212} 250}
213 251
214static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,
215 unsigned long error_code)
216{
217 if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
218 boot_cpu_data.x86 >= 6)) {
219 /* Catch an obscure case of prefetch inside an NX page. */
220 if (nx_enabled && (error_code & 16))
221 return 0;
222 return __is_prefetch(regs, addr);
223 }
224 return 0;
225}
226
227static noinline void force_sig_info_fault(int si_signo, int si_code, 252static noinline void force_sig_info_fault(int si_signo, int si_code,
228 unsigned long address, struct task_struct *tsk) 253 unsigned long address, struct task_struct *tsk)
229{ 254{
@@ -274,6 +299,42 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
274 return pmd_k; 299 return pmd_k;
275} 300}
276 301
302#ifdef CONFIG_X86_64
303static const char errata93_warning[] =
304KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
305KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
306KERN_ERR "******* Please consider a BIOS update.\n"
307KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
308
309/* Workaround for K8 erratum #93 & buggy BIOS.
310 BIOS SMM functions are required to use a specific workaround
311 to avoid corruption of the 64bit RIP register on C stepping K8.
312 A lot of BIOS that didn't get tested properly miss this.
313 The OS sees this as a page fault with the upper 32bits of RIP cleared.
314 Try to work around it here.
315 Note we only handle faults in kernel here. */
316
317static int is_errata93(struct pt_regs *regs, unsigned long address)
318{
319 static int warned;
320 if (address != regs->ip)
321 return 0;
322 if ((address >> 32) != 0)
323 return 0;
324 address |= 0xffffffffUL << 32;
325 if ((address >= (u64)_stext && address <= (u64)_etext) ||
326 (address >= MODULES_VADDR && address <= MODULES_END)) {
327 if (!warned) {
328 printk(errata93_warning);
329 warned = 1;
330 }
331 regs->ip = address;
332 return 1;
333 }
334 return 0;
335}
336#endif
337
277/* 338/*
278 * Handle a fault on the vmalloc or module mapping area 339 * Handle a fault on the vmalloc or module mapping area
279 * 340 *
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 7e98a7691283..0d3d5979ce2c 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -64,32 +64,136 @@ static inline int notify_page_fault(struct pt_regs *regs)
64#endif 64#endif
65} 65}
66 66
67/* Sometimes the CPU reports invalid exceptions on prefetch. 67#ifdef CONFIG_X86_32
68 Check that here and ignore. 68/*
69 Opcode checker based on code by Richard Brunner */ 69 * Return EIP plus the CS segment base. The segment limit is also
70static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, 70 * adjusted, clamped to the kernel/user address space (whichever is
71 unsigned long error_code) 71 * appropriate), and returned in *eip_limit.
72 *
73 * The segment is checked, because it might have been changed by another
74 * task between the original faulting instruction and here.
75 *
76 * If CS is no longer a valid code segment, or if EIP is beyond the
77 * limit, or if it is a kernel address when CS is not a kernel segment,
78 * then the returned value will be greater than *eip_limit.
79 *
80 * This is slow, but is very rarely executed.
81 */
82static inline unsigned long get_segment_eip(struct pt_regs *regs,
83 unsigned long *eip_limit)
84{
85 unsigned long ip = regs->ip;
86 unsigned seg = regs->cs & 0xffff;
87 u32 seg_ar, seg_limit, base, *desc;
88
89 /* Unlikely, but must come before segment checks. */
90 if (unlikely(regs->flags & VM_MASK)) {
91 base = seg << 4;
92 *eip_limit = base + 0xffff;
93 return base + (ip & 0xffff);
94 }
95
96 /* The standard kernel/user address space limit. */
97 *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
98
99 /* By far the most common cases. */
100 if (likely(SEGMENT_IS_FLAT_CODE(seg)))
101 return ip;
102
103 /* Check the segment exists, is within the current LDT/GDT size,
104 that kernel/user (ring 0..3) has the appropriate privilege,
105 that it's a code segment, and get the limit. */
106 __asm__("larl %3,%0; lsll %3,%1"
107 : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
108 if ((~seg_ar & 0x9800) || ip > seg_limit) {
109 *eip_limit = 0;
110 return 1; /* So that returned ip > *eip_limit. */
111 }
112
113 /* Get the GDT/LDT descriptor base.
114 When you look for races in this code remember that
115 LDT and other horrors are only used in user space. */
116 if (seg & (1<<2)) {
117 /* Must lock the LDT while reading it. */
118 mutex_lock(&current->mm->context.lock);
119 desc = current->mm->context.ldt;
120 desc = (void *)desc + (seg & ~7);
121 } else {
122 /* Must disable preemption while reading the GDT. */
123 desc = (u32 *)get_cpu_gdt_table(get_cpu());
124 desc = (void *)desc + (seg & ~7);
125 }
126
127 /* Decode the code segment base from the descriptor */
128 base = get_desc_base((struct desc_struct *)desc);
129
130 if (seg & (1<<2))
131 mutex_unlock(&current->mm->context.lock);
132 else
133 put_cpu();
134
135 /* Adjust EIP and segment limit, and clamp at the kernel limit.
136 It's legitimate for segments to wrap at 0xffffffff. */
137 seg_limit += base;
138 if (seg_limit < *eip_limit && seg_limit >= base)
139 *eip_limit = seg_limit;
140 return ip + base;
141}
142#endif
143
144/*
145 * X86_32
146 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
147 * Check that here and ignore it.
148 *
149 * X86_64
150 * Sometimes the CPU reports invalid exceptions on prefetch.
151 * Check that here and ignore it.
152 *
153 * Opcode checker based on code by Richard Brunner
154 */
155static int is_prefetch(struct pt_regs *regs, unsigned long addr,
156 unsigned long error_code)
72{ 157{
73 unsigned char *instr; 158 unsigned char *instr;
74 int scan_more = 1; 159 int scan_more = 1;
75 int prefetch = 0; 160 int prefetch = 0;
76 unsigned char *max_instr; 161 unsigned char *max_instr;
77 162
163#ifdef CONFIG_X86_32
164 unsigned long limit;
165 if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
166 boot_cpu_data.x86 >= 6)) {
167 /* Catch an obscure case of prefetch inside an NX page. */
168 if (nx_enabled && (error_code & PF_INSTR))
169 return 0;
170 } else {
171 return 0;
172 }
173 instr = (unsigned char *)get_segment_eip(regs, &limit);
174#else
78 /* If it was a exec fault ignore */ 175 /* If it was a exec fault ignore */
79 if (error_code & PF_INSTR) 176 if (error_code & PF_INSTR)
80 return 0; 177 return 0;
81
82 instr = (unsigned char __user *)convert_rip_to_linear(current, regs); 178 instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
179#endif
180
83 max_instr = instr + 15; 181 max_instr = instr + 15;
84 182
183#ifdef CONFIG_X86_64
85 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) 184 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
86 return 0; 185 return 0;
186#endif
87 187
88 while (scan_more && instr < max_instr) { 188 while (scan_more && instr < max_instr) {
89 unsigned char opcode; 189 unsigned char opcode;
90 unsigned char instr_hi; 190 unsigned char instr_hi;
91 unsigned char instr_lo; 191 unsigned char instr_lo;
92 192
193#ifdef CONFIG_X86_32
194 if (instr > (unsigned char *)limit)
195 break;
196#endif
93 if (probe_kernel_address(instr, opcode)) 197 if (probe_kernel_address(instr, opcode))
94 break; 198 break;
95 199
@@ -125,12 +229,16 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
125 scan_more = (instr_lo & 0xC) == 0x4; 229 scan_more = (instr_lo & 0xC) == 0x4;
126 break; 230 break;
127 case 0xF0: 231 case 0xF0:
128 /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */ 232 /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
129 scan_more = !instr_lo || (instr_lo>>1) == 1; 233 scan_more = !instr_lo || (instr_lo>>1) == 1;
130 break; 234 break;
131 case 0x00: 235 case 0x00:
132 /* Prefetch instruction is 0x0F0D or 0x0F18 */ 236 /* Prefetch instruction is 0x0F0D or 0x0F18 */
133 scan_more = 0; 237 scan_more = 0;
238#ifdef CONFIG_X86_32
239 if (instr > (unsigned char *)limit)
240 break;
241#endif
134 if (probe_kernel_address(instr, opcode)) 242 if (probe_kernel_address(instr, opcode))
135 break; 243 break;
136 prefetch = (instr_lo == 0xF) && 244 prefetch = (instr_lo == 0xF) &&
@@ -185,6 +293,7 @@ bad:
185 printk("BAD\n"); 293 printk("BAD\n");
186} 294}
187 295
296#ifdef CONFIG_X86_64
188static const char errata93_warning[] = 297static const char errata93_warning[] =
189KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" 298KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
190KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" 299KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
@@ -218,6 +327,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
218 } 327 }
219 return 0; 328 return 0;
220} 329}
330#endif
221 331
222static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, 332static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
223 unsigned long error_code) 333 unsigned long error_code)