aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-03-01 02:14:45 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-03-08 18:55:12 -0500
commit9be72573a80648866ed0045db22d97c6e160a540 (patch)
tree5fc692504504dfbcce47489877481d3bd465ccd7 /arch/powerpc
parent9f2f79e3a3c19ae745d0439d6e0eed31df28de3c (diff)
powerpc: Add support for page fault retry and fatal signals
Other architectures such as x86 and ARM have been growing new support for features like retrying page faults after dropping the mm semaphore to break contention, or being able to return from a stuck page fault when a SIGKILL is pending. This refactors our implementation of do_page_fault() to move the error handling out of line in a way similar to x86 and adds support for those two features. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/mm/fault.c170
1 files changed, 120 insertions, 50 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 7e890065cf39..19f2f9498b27 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -105,6 +105,82 @@ static int store_updates_sp(struct pt_regs *regs)
105 } 105 }
106 return 0; 106 return 0;
107} 107}
108/*
109 * do_page_fault error handling helpers
110 */
111
112#define MM_FAULT_RETURN 0
113#define MM_FAULT_CONTINUE -1
114#define MM_FAULT_ERR(sig) (sig)
115
116static int out_of_memory(struct pt_regs *regs)
117{
118 /*
119 * We ran out of memory, or some other thing happened to us that made
120 * us unable to handle the page fault gracefully.
121 */
122 up_read(&current->mm->mmap_sem);
123 if (!user_mode(regs))
124 return MM_FAULT_ERR(SIGKILL);
125 pagefault_out_of_memory();
126 return MM_FAULT_RETURN;
127}
128
129static int do_sigbus(struct pt_regs *regs, unsigned long address)
130{
131 siginfo_t info;
132
133 up_read(&current->mm->mmap_sem);
134
135 if (user_mode(regs)) {
136 info.si_signo = SIGBUS;
137 info.si_errno = 0;
138 info.si_code = BUS_ADRERR;
139 info.si_addr = (void __user *)address;
140 force_sig_info(SIGBUS, &info, current);
141 return MM_FAULT_RETURN;
142 }
143 return MM_FAULT_ERR(SIGBUS);
144}
145
146static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
147{
148 /*
149 * Pagefault was interrupted by SIGKILL. We have no reason to
150 * continue the pagefault.
151 */
152 if (fatal_signal_pending(current)) {
153 /*
154 * If we have retry set, the mmap semaphore will have
155 * alrady been released in __lock_page_or_retry(). Else
156 * we release it now.
157 */
158 if (!(fault & VM_FAULT_RETRY))
159 up_read(&current->mm->mmap_sem);
160 /* Coming from kernel, we need to deal with uaccess fixups */
161 if (user_mode(regs))
162 return MM_FAULT_RETURN;
163 return MM_FAULT_ERR(SIGKILL);
164 }
165
166 /* No fault: be happy */
167 if (!(fault & VM_FAULT_ERROR))
168 return MM_FAULT_CONTINUE;
169
170 /* Out of memory */
171 if (fault & VM_FAULT_OOM)
172 return out_of_memory(regs);
173
174 /* Bus error. x86 handles HWPOISON here, we'll add this if/when
175 * we support the feature in HW
176 */
177 if (fault & VM_FAULT_SIGBUS)
178 return do_sigbus(regs, addr);
179
180 /* We don't understand the fault code, this is fatal */
181 BUG();
182 return MM_FAULT_CONTINUE;
183}
108 184
109/* 185/*
110 * For 600- and 800-family processors, the error_code parameter is DSISR 186 * For 600- and 800-family processors, the error_code parameter is DSISR
@@ -124,11 +200,12 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
124{ 200{
125 struct vm_area_struct * vma; 201 struct vm_area_struct * vma;
126 struct mm_struct *mm = current->mm; 202 struct mm_struct *mm = current->mm;
127 siginfo_t info; 203 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
128 int code = SEGV_MAPERR; 204 int code = SEGV_MAPERR;
129 int is_write = 0, ret; 205 int is_write = 0;
130 int trap = TRAP(regs); 206 int trap = TRAP(regs);
131 int is_exec = trap == 0x400; 207 int is_exec = trap == 0x400;
208 int fault;
132 209
133#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) 210#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
134 /* 211 /*
@@ -145,6 +222,9 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
145 is_write = error_code & ESR_DST; 222 is_write = error_code & ESR_DST;
146#endif /* CONFIG_4xx || CONFIG_BOOKE */ 223#endif /* CONFIG_4xx || CONFIG_BOOKE */
147 224
225 if (is_write)
226 flags |= FAULT_FLAG_WRITE;
227
148#ifdef CONFIG_PPC_ICSWX 228#ifdef CONFIG_PPC_ICSWX
149 /* 229 /*
150 * we need to do this early because this "data storage 230 * we need to do this early because this "data storage
@@ -152,13 +232,11 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
152 * look at it 232 * look at it
153 */ 233 */
154 if (error_code & ICSWX_DSI_UCT) { 234 if (error_code & ICSWX_DSI_UCT) {
155 int ret; 235 int rc = acop_handle_fault(regs, address, error_code);
156 236 if (rc)
157 ret = acop_handle_fault(regs, address, error_code); 237 return rc;
158 if (ret)
159 return ret;
160 } 238 }
161#endif 239#endif /* CONFIG_PPC_ICSWX */
162 240
163 if (notify_page_fault(regs)) 241 if (notify_page_fault(regs))
164 return 0; 242 return 0;
@@ -216,6 +294,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
216 if (!user_mode(regs) && !search_exception_tables(regs->nip)) 294 if (!user_mode(regs) && !search_exception_tables(regs->nip))
217 goto bad_area_nosemaphore; 295 goto bad_area_nosemaphore;
218 296
297retry:
219 down_read(&mm->mmap_sem); 298 down_read(&mm->mmap_sem);
220 } else { 299 } else {
221 /* 300 /*
@@ -338,30 +417,43 @@ good_area:
338 * make sure we exit gracefully rather than endlessly redo 417 * make sure we exit gracefully rather than endlessly redo
339 * the fault. 418 * the fault.
340 */ 419 */
341 ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0); 420 fault = handle_mm_fault(mm, vma, address, flags);
342 if (unlikely(ret & VM_FAULT_ERROR)) { 421 if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
343 if (ret & VM_FAULT_OOM) 422 int rc = mm_fault_error(regs, address, fault);
344 goto out_of_memory; 423 if (rc >= MM_FAULT_RETURN)
345 else if (ret & VM_FAULT_SIGBUS) 424 return rc;
346 goto do_sigbus;
347 BUG();
348 } 425 }
349 if (ret & VM_FAULT_MAJOR) { 426
350 current->maj_flt++; 427 /*
351 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 428 * Major/minor page fault accounting is only done on the
352 regs, address); 429 * initial attempt. If we go through a retry, it is extremely
430 * likely that the page will be found in page cache at that point.
431 */
432 if (flags & FAULT_FLAG_ALLOW_RETRY) {
433 if (fault & VM_FAULT_MAJOR) {
434 current->maj_flt++;
435 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
436 regs, address);
353#ifdef CONFIG_PPC_SMLPAR 437#ifdef CONFIG_PPC_SMLPAR
354 if (firmware_has_feature(FW_FEATURE_CMO)) { 438 if (firmware_has_feature(FW_FEATURE_CMO)) {
355 preempt_disable(); 439 preempt_disable();
356 get_lppaca()->page_ins += (1 << PAGE_FACTOR); 440 get_lppaca()->page_ins += (1 << PAGE_FACTOR);
357 preempt_enable(); 441 preempt_enable();
442 }
443#endif /* CONFIG_PPC_SMLPAR */
444 } else {
445 current->min_flt++;
446 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
447 regs, address);
448 }
449 if (fault & VM_FAULT_RETRY) {
450 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
451 * of starvation. */
452 flags &= ~FAULT_FLAG_ALLOW_RETRY;
453 goto retry;
358 } 454 }
359#endif
360 } else {
361 current->min_flt++;
362 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
363 regs, address);
364 } 455 }
456
365 up_read(&mm->mmap_sem); 457 up_read(&mm->mmap_sem);
366 return 0; 458 return 0;
367 459
@@ -382,28 +474,6 @@ bad_area_nosemaphore:
382 474
383 return SIGSEGV; 475 return SIGSEGV;
384 476
385/*
386 * We ran out of memory, or some other thing happened to us that made
387 * us unable to handle the page fault gracefully.
388 */
389out_of_memory:
390 up_read(&mm->mmap_sem);
391 if (!user_mode(regs))
392 return SIGKILL;
393 pagefault_out_of_memory();
394 return 0;
395
396do_sigbus:
397 up_read(&mm->mmap_sem);
398 if (user_mode(regs)) {
399 info.si_signo = SIGBUS;
400 info.si_errno = 0;
401 info.si_code = BUS_ADRERR;
402 info.si_addr = (void __user *)address;
403 force_sig_info(SIGBUS, &info, current);
404 return 0;
405 }
406 return SIGBUS;
407} 477}
408 478
409/* 479/*