diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2012-03-01 02:14:45 -0500 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2012-03-08 18:55:12 -0500 |
commit | 9be72573a80648866ed0045db22d97c6e160a540 (patch) | |
tree | 5fc692504504dfbcce47489877481d3bd465ccd7 /arch/powerpc | |
parent | 9f2f79e3a3c19ae745d0439d6e0eed31df28de3c (diff) |
powerpc: Add support for page fault retry and fatal signals
Other architectures such as x86 and ARM have been growing
new support for features like retrying page faults after
dropping the mm semaphore to break contention, or being
able to return from a stuck page fault when a SIGKILL is
pending.
This refactors our implementation of do_page_fault() to
move the error handling out of line in a way similar to
x86 and adds support for those two features.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc')
-rw-r--r-- | arch/powerpc/mm/fault.c | 170 |
1 files changed, 120 insertions, 50 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 7e890065cf39..19f2f9498b27 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c | |||
@@ -105,6 +105,82 @@ static int store_updates_sp(struct pt_regs *regs) | |||
105 | } | 105 | } |
106 | return 0; | 106 | return 0; |
107 | } | 107 | } |
108 | /* | ||
109 | * do_page_fault error handling helpers | ||
110 | */ | ||
111 | |||
112 | #define MM_FAULT_RETURN 0 | ||
113 | #define MM_FAULT_CONTINUE -1 | ||
114 | #define MM_FAULT_ERR(sig) (sig) | ||
115 | |||
116 | static int out_of_memory(struct pt_regs *regs) | ||
117 | { | ||
118 | /* | ||
119 | * We ran out of memory, or some other thing happened to us that made | ||
120 | * us unable to handle the page fault gracefully. | ||
121 | */ | ||
122 | up_read(¤t->mm->mmap_sem); | ||
123 | if (!user_mode(regs)) | ||
124 | return MM_FAULT_ERR(SIGKILL); | ||
125 | pagefault_out_of_memory(); | ||
126 | return MM_FAULT_RETURN; | ||
127 | } | ||
128 | |||
129 | static int do_sigbus(struct pt_regs *regs, unsigned long address) | ||
130 | { | ||
131 | siginfo_t info; | ||
132 | |||
133 | up_read(¤t->mm->mmap_sem); | ||
134 | |||
135 | if (user_mode(regs)) { | ||
136 | info.si_signo = SIGBUS; | ||
137 | info.si_errno = 0; | ||
138 | info.si_code = BUS_ADRERR; | ||
139 | info.si_addr = (void __user *)address; | ||
140 | force_sig_info(SIGBUS, &info, current); | ||
141 | return MM_FAULT_RETURN; | ||
142 | } | ||
143 | return MM_FAULT_ERR(SIGBUS); | ||
144 | } | ||
145 | |||
146 | static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) | ||
147 | { | ||
148 | /* | ||
149 | * Pagefault was interrupted by SIGKILL. We have no reason to | ||
150 | * continue the pagefault. | ||
151 | */ | ||
152 | if (fatal_signal_pending(current)) { | ||
153 | /* | ||
154 | * If we have retry set, the mmap semaphore will have | ||
155 | * alrady been released in __lock_page_or_retry(). Else | ||
156 | * we release it now. | ||
157 | */ | ||
158 | if (!(fault & VM_FAULT_RETRY)) | ||
159 | up_read(¤t->mm->mmap_sem); | ||
160 | /* Coming from kernel, we need to deal with uaccess fixups */ | ||
161 | if (user_mode(regs)) | ||
162 | return MM_FAULT_RETURN; | ||
163 | return MM_FAULT_ERR(SIGKILL); | ||
164 | } | ||
165 | |||
166 | /* No fault: be happy */ | ||
167 | if (!(fault & VM_FAULT_ERROR)) | ||
168 | return MM_FAULT_CONTINUE; | ||
169 | |||
170 | /* Out of memory */ | ||
171 | if (fault & VM_FAULT_OOM) | ||
172 | return out_of_memory(regs); | ||
173 | |||
174 | /* Bus error. x86 handles HWPOISON here, we'll add this if/when | ||
175 | * we support the feature in HW | ||
176 | */ | ||
177 | if (fault & VM_FAULT_SIGBUS) | ||
178 | return do_sigbus(regs, addr); | ||
179 | |||
180 | /* We don't understand the fault code, this is fatal */ | ||
181 | BUG(); | ||
182 | return MM_FAULT_CONTINUE; | ||
183 | } | ||
108 | 184 | ||
109 | /* | 185 | /* |
110 | * For 600- and 800-family processors, the error_code parameter is DSISR | 186 | * For 600- and 800-family processors, the error_code parameter is DSISR |
@@ -124,11 +200,12 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, | |||
124 | { | 200 | { |
125 | struct vm_area_struct * vma; | 201 | struct vm_area_struct * vma; |
126 | struct mm_struct *mm = current->mm; | 202 | struct mm_struct *mm = current->mm; |
127 | siginfo_t info; | 203 | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; |
128 | int code = SEGV_MAPERR; | 204 | int code = SEGV_MAPERR; |
129 | int is_write = 0, ret; | 205 | int is_write = 0; |
130 | int trap = TRAP(regs); | 206 | int trap = TRAP(regs); |
131 | int is_exec = trap == 0x400; | 207 | int is_exec = trap == 0x400; |
208 | int fault; | ||
132 | 209 | ||
133 | #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) | 210 | #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) |
134 | /* | 211 | /* |
@@ -145,6 +222,9 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, | |||
145 | is_write = error_code & ESR_DST; | 222 | is_write = error_code & ESR_DST; |
146 | #endif /* CONFIG_4xx || CONFIG_BOOKE */ | 223 | #endif /* CONFIG_4xx || CONFIG_BOOKE */ |
147 | 224 | ||
225 | if (is_write) | ||
226 | flags |= FAULT_FLAG_WRITE; | ||
227 | |||
148 | #ifdef CONFIG_PPC_ICSWX | 228 | #ifdef CONFIG_PPC_ICSWX |
149 | /* | 229 | /* |
150 | * we need to do this early because this "data storage | 230 | * we need to do this early because this "data storage |
@@ -152,13 +232,11 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, | |||
152 | * look at it | 232 | * look at it |
153 | */ | 233 | */ |
154 | if (error_code & ICSWX_DSI_UCT) { | 234 | if (error_code & ICSWX_DSI_UCT) { |
155 | int ret; | 235 | int rc = acop_handle_fault(regs, address, error_code); |
156 | 236 | if (rc) | |
157 | ret = acop_handle_fault(regs, address, error_code); | 237 | return rc; |
158 | if (ret) | ||
159 | return ret; | ||
160 | } | 238 | } |
161 | #endif | 239 | #endif /* CONFIG_PPC_ICSWX */ |
162 | 240 | ||
163 | if (notify_page_fault(regs)) | 241 | if (notify_page_fault(regs)) |
164 | return 0; | 242 | return 0; |
@@ -216,6 +294,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, | |||
216 | if (!user_mode(regs) && !search_exception_tables(regs->nip)) | 294 | if (!user_mode(regs) && !search_exception_tables(regs->nip)) |
217 | goto bad_area_nosemaphore; | 295 | goto bad_area_nosemaphore; |
218 | 296 | ||
297 | retry: | ||
219 | down_read(&mm->mmap_sem); | 298 | down_read(&mm->mmap_sem); |
220 | } else { | 299 | } else { |
221 | /* | 300 | /* |
@@ -338,30 +417,43 @@ good_area: | |||
338 | * make sure we exit gracefully rather than endlessly redo | 417 | * make sure we exit gracefully rather than endlessly redo |
339 | * the fault. | 418 | * the fault. |
340 | */ | 419 | */ |
341 | ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0); | 420 | fault = handle_mm_fault(mm, vma, address, flags); |
342 | if (unlikely(ret & VM_FAULT_ERROR)) { | 421 | if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { |
343 | if (ret & VM_FAULT_OOM) | 422 | int rc = mm_fault_error(regs, address, fault); |
344 | goto out_of_memory; | 423 | if (rc >= MM_FAULT_RETURN) |
345 | else if (ret & VM_FAULT_SIGBUS) | 424 | return rc; |
346 | goto do_sigbus; | ||
347 | BUG(); | ||
348 | } | 425 | } |
349 | if (ret & VM_FAULT_MAJOR) { | 426 | |
350 | current->maj_flt++; | 427 | /* |
351 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, | 428 | * Major/minor page fault accounting is only done on the |
352 | regs, address); | 429 | * initial attempt. If we go through a retry, it is extremely |
430 | * likely that the page will be found in page cache at that point. | ||
431 | */ | ||
432 | if (flags & FAULT_FLAG_ALLOW_RETRY) { | ||
433 | if (fault & VM_FAULT_MAJOR) { | ||
434 | current->maj_flt++; | ||
435 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, | ||
436 | regs, address); | ||
353 | #ifdef CONFIG_PPC_SMLPAR | 437 | #ifdef CONFIG_PPC_SMLPAR |
354 | if (firmware_has_feature(FW_FEATURE_CMO)) { | 438 | if (firmware_has_feature(FW_FEATURE_CMO)) { |
355 | preempt_disable(); | 439 | preempt_disable(); |
356 | get_lppaca()->page_ins += (1 << PAGE_FACTOR); | 440 | get_lppaca()->page_ins += (1 << PAGE_FACTOR); |
357 | preempt_enable(); | 441 | preempt_enable(); |
442 | } | ||
443 | #endif /* CONFIG_PPC_SMLPAR */ | ||
444 | } else { | ||
445 | current->min_flt++; | ||
446 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, | ||
447 | regs, address); | ||
448 | } | ||
449 | if (fault & VM_FAULT_RETRY) { | ||
450 | /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk | ||
451 | * of starvation. */ | ||
452 | flags &= ~FAULT_FLAG_ALLOW_RETRY; | ||
453 | goto retry; | ||
358 | } | 454 | } |
359 | #endif | ||
360 | } else { | ||
361 | current->min_flt++; | ||
362 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, | ||
363 | regs, address); | ||
364 | } | 455 | } |
456 | |||
365 | up_read(&mm->mmap_sem); | 457 | up_read(&mm->mmap_sem); |
366 | return 0; | 458 | return 0; |
367 | 459 | ||
@@ -382,28 +474,6 @@ bad_area_nosemaphore: | |||
382 | 474 | ||
383 | return SIGSEGV; | 475 | return SIGSEGV; |
384 | 476 | ||
385 | /* | ||
386 | * We ran out of memory, or some other thing happened to us that made | ||
387 | * us unable to handle the page fault gracefully. | ||
388 | */ | ||
389 | out_of_memory: | ||
390 | up_read(&mm->mmap_sem); | ||
391 | if (!user_mode(regs)) | ||
392 | return SIGKILL; | ||
393 | pagefault_out_of_memory(); | ||
394 | return 0; | ||
395 | |||
396 | do_sigbus: | ||
397 | up_read(&mm->mmap_sem); | ||
398 | if (user_mode(regs)) { | ||
399 | info.si_signo = SIGBUS; | ||
400 | info.si_errno = 0; | ||
401 | info.si_code = BUS_ADRERR; | ||
402 | info.si_addr = (void __user *)address; | ||
403 | force_sig_info(SIGBUS, &info, current); | ||
404 | return 0; | ||
405 | } | ||
406 | return SIGBUS; | ||
407 | } | 477 | } |
408 | 478 | ||
409 | /* | 479 | /* |