diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/s390/mm | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/Makefile | 3 | ||||
-rw-r--r-- | arch/s390/mm/cmm.c | 9 | ||||
-rw-r--r-- | arch/s390/mm/extmem.c | 6 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 321 | ||||
-rw-r--r-- | arch/s390/mm/gup.c | 224 | ||||
-rw-r--r-- | arch/s390/mm/hugetlbpage.c | 12 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 60 | ||||
-rw-r--r-- | arch/s390/mm/maccess.c | 8 | ||||
-rw-r--r-- | arch/s390/mm/mmap.c | 49 | ||||
-rw-r--r-- | arch/s390/mm/pageattr.c | 61 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 238 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 14 |
12 files changed, 712 insertions, 293 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index eec054484419..d98fe9004a52 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile | |||
@@ -3,6 +3,7 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ | 5 | obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ |
6 | page-states.o | 6 | page-states.o gup.o |
7 | obj-$(CONFIG_CMM) += cmm.o | 7 | obj-$(CONFIG_CMM) += cmm.o |
8 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | 8 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o |
9 | obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o | ||
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index a9550dca3e4b..1f1dba9dcf58 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c | |||
@@ -23,7 +23,10 @@ | |||
23 | #include <asm/pgalloc.h> | 23 | #include <asm/pgalloc.h> |
24 | #include <asm/diag.h> | 24 | #include <asm/diag.h> |
25 | 25 | ||
26 | static char *sender = "VMRMSVM"; | 26 | #ifdef CONFIG_CMM_IUCV |
27 | static char *cmm_default_sender = "VMRMSVM"; | ||
28 | #endif | ||
29 | static char *sender; | ||
27 | module_param(sender, charp, 0400); | 30 | module_param(sender, charp, 0400); |
28 | MODULE_PARM_DESC(sender, | 31 | MODULE_PARM_DESC(sender, |
29 | "Guest name that may send SMSG messages (default VMRMSVM)"); | 32 | "Guest name that may send SMSG messages (default VMRMSVM)"); |
@@ -88,7 +91,7 @@ static long cmm_alloc_pages(long nr, long *counter, | |||
88 | } else | 91 | } else |
89 | free_page((unsigned long) npa); | 92 | free_page((unsigned long) npa); |
90 | } | 93 | } |
91 | diag10(addr); | 94 | diag10_range(addr >> PAGE_SHIFT, 1); |
92 | pa->pages[pa->index++] = addr; | 95 | pa->pages[pa->index++] = addr; |
93 | (*counter)++; | 96 | (*counter)++; |
94 | spin_unlock(&cmm_lock); | 97 | spin_unlock(&cmm_lock); |
@@ -440,6 +443,8 @@ static int __init cmm_init(void) | |||
440 | int len = strlen(sender); | 443 | int len = strlen(sender); |
441 | while (len--) | 444 | while (len--) |
442 | sender[len] = toupper(sender[len]); | 445 | sender[len] = toupper(sender[len]); |
446 | } else { | ||
447 | sender = cmm_default_sender; | ||
443 | } | 448 | } |
444 | 449 | ||
445 | rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); | 450 | rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); |
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 3cc95dd0a3a6..075ddada4911 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c | |||
@@ -412,6 +412,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long | |||
412 | struct dcss_segment *seg; | 412 | struct dcss_segment *seg; |
413 | int rc, diag_cc; | 413 | int rc, diag_cc; |
414 | 414 | ||
415 | start_addr = end_addr = 0; | ||
415 | seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA); | 416 | seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA); |
416 | if (seg == NULL) { | 417 | if (seg == NULL) { |
417 | rc = -ENOMEM; | 418 | rc = -ENOMEM; |
@@ -573,6 +574,7 @@ segment_modify_shared (char *name, int do_nonshared) | |||
573 | unsigned long start_addr, end_addr, dummy; | 574 | unsigned long start_addr, end_addr, dummy; |
574 | int rc, diag_cc; | 575 | int rc, diag_cc; |
575 | 576 | ||
577 | start_addr = end_addr = 0; | ||
576 | mutex_lock(&dcss_lock); | 578 | mutex_lock(&dcss_lock); |
577 | seg = segment_by_name (name); | 579 | seg = segment_by_name (name); |
578 | if (seg == NULL) { | 580 | if (seg == NULL) { |
@@ -681,8 +683,6 @@ void | |||
681 | segment_save(char *name) | 683 | segment_save(char *name) |
682 | { | 684 | { |
683 | struct dcss_segment *seg; | 685 | struct dcss_segment *seg; |
684 | int startpfn = 0; | ||
685 | int endpfn = 0; | ||
686 | char cmd1[160]; | 686 | char cmd1[160]; |
687 | char cmd2[80]; | 687 | char cmd2[80]; |
688 | int i, response; | 688 | int i, response; |
@@ -698,8 +698,6 @@ segment_save(char *name) | |||
698 | goto out; | 698 | goto out; |
699 | } | 699 | } |
700 | 700 | ||
701 | startpfn = seg->start_addr >> PAGE_SHIFT; | ||
702 | endpfn = (seg->end) >> PAGE_SHIFT; | ||
703 | sprintf(cmd1, "DEFSEG %s", name); | 701 | sprintf(cmd1, "DEFSEG %s", name); |
704 | for (i=0; i<seg->segcnt; i++) { | 702 | for (i=0; i<seg->segcnt; i++) { |
705 | sprintf(cmd1+strlen(cmd1), " %lX-%lX %s", | 703 | sprintf(cmd1+strlen(cmd1), " %lX-%lX %s", |
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2505b2ea0ef1..fe103e891e7a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c | |||
@@ -10,6 +10,7 @@ | |||
10 | * Copyright (C) 1995 Linus Torvalds | 10 | * Copyright (C) 1995 Linus Torvalds |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/kernel_stat.h> | ||
13 | #include <linux/perf_event.h> | 14 | #include <linux/perf_event.h> |
14 | #include <linux/signal.h> | 15 | #include <linux/signal.h> |
15 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
@@ -33,7 +34,7 @@ | |||
33 | #include <asm/asm-offsets.h> | 34 | #include <asm/asm-offsets.h> |
34 | #include <asm/system.h> | 35 | #include <asm/system.h> |
35 | #include <asm/pgtable.h> | 36 | #include <asm/pgtable.h> |
36 | #include <asm/s390_ext.h> | 37 | #include <asm/irq.h> |
37 | #include <asm/mmu_context.h> | 38 | #include <asm/mmu_context.h> |
38 | #include <asm/compat.h> | 39 | #include <asm/compat.h> |
39 | #include "../kernel/entry.h" | 40 | #include "../kernel/entry.h" |
@@ -52,6 +53,14 @@ | |||
52 | #define VM_FAULT_BADMAP 0x020000 | 53 | #define VM_FAULT_BADMAP 0x020000 |
53 | #define VM_FAULT_BADACCESS 0x040000 | 54 | #define VM_FAULT_BADACCESS 0x040000 |
54 | 55 | ||
56 | static unsigned long store_indication; | ||
57 | |||
58 | void fault_init(void) | ||
59 | { | ||
60 | if (test_facility(2) && test_facility(75)) | ||
61 | store_indication = 0xc00; | ||
62 | } | ||
63 | |||
55 | static inline int notify_page_fault(struct pt_regs *regs) | 64 | static inline int notify_page_fault(struct pt_regs *regs) |
56 | { | 65 | { |
57 | int ret = 0; | 66 | int ret = 0; |
@@ -199,42 +208,22 @@ static noinline void do_sigbus(struct pt_regs *regs, long int_code, | |||
199 | unsigned long trans_exc_code) | 208 | unsigned long trans_exc_code) |
200 | { | 209 | { |
201 | struct task_struct *tsk = current; | 210 | struct task_struct *tsk = current; |
211 | unsigned long address; | ||
212 | struct siginfo si; | ||
202 | 213 | ||
203 | /* | 214 | /* |
204 | * Send a sigbus, regardless of whether we were in kernel | 215 | * Send a sigbus, regardless of whether we were in kernel |
205 | * or user mode. | 216 | * or user mode. |
206 | */ | 217 | */ |
207 | tsk->thread.prot_addr = trans_exc_code & __FAIL_ADDR_MASK; | 218 | address = trans_exc_code & __FAIL_ADDR_MASK; |
219 | tsk->thread.prot_addr = address; | ||
208 | tsk->thread.trap_no = int_code; | 220 | tsk->thread.trap_no = int_code; |
209 | force_sig(SIGBUS, tsk); | 221 | si.si_signo = SIGBUS; |
210 | } | 222 | si.si_errno = 0; |
211 | 223 | si.si_code = BUS_ADRERR; | |
212 | #ifdef CONFIG_S390_EXEC_PROTECT | 224 | si.si_addr = (void __user *) address; |
213 | static noinline int signal_return(struct pt_regs *regs, long int_code, | 225 | force_sig_info(SIGBUS, &si, tsk); |
214 | unsigned long trans_exc_code) | ||
215 | { | ||
216 | u16 instruction; | ||
217 | int rc; | ||
218 | |||
219 | rc = __get_user(instruction, (u16 __user *) regs->psw.addr); | ||
220 | |||
221 | if (!rc && instruction == 0x0a77) { | ||
222 | clear_tsk_thread_flag(current, TIF_SINGLE_STEP); | ||
223 | if (is_compat_task()) | ||
224 | sys32_sigreturn(); | ||
225 | else | ||
226 | sys_sigreturn(); | ||
227 | } else if (!rc && instruction == 0x0aad) { | ||
228 | clear_tsk_thread_flag(current, TIF_SINGLE_STEP); | ||
229 | if (is_compat_task()) | ||
230 | sys32_rt_sigreturn(); | ||
231 | else | ||
232 | sys_rt_sigreturn(); | ||
233 | } else | ||
234 | do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); | ||
235 | return 0; | ||
236 | } | 226 | } |
237 | #endif /* CONFIG_S390_EXEC_PROTECT */ | ||
238 | 227 | ||
239 | static noinline void do_fault_error(struct pt_regs *regs, long int_code, | 228 | static noinline void do_fault_error(struct pt_regs *regs, long int_code, |
240 | unsigned long trans_exc_code, int fault) | 229 | unsigned long trans_exc_code, int fault) |
@@ -243,13 +232,6 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, | |||
243 | 232 | ||
244 | switch (fault) { | 233 | switch (fault) { |
245 | case VM_FAULT_BADACCESS: | 234 | case VM_FAULT_BADACCESS: |
246 | #ifdef CONFIG_S390_EXEC_PROTECT | ||
247 | if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && | ||
248 | (trans_exc_code & 3) == 0) { | ||
249 | signal_return(regs, int_code, trans_exc_code); | ||
250 | break; | ||
251 | } | ||
252 | #endif /* CONFIG_S390_EXEC_PROTECT */ | ||
253 | case VM_FAULT_BADMAP: | 235 | case VM_FAULT_BADMAP: |
254 | /* Bad memory access. Check if it is kernel or user space. */ | 236 | /* Bad memory access. Check if it is kernel or user space. */ |
255 | if (regs->psw.mask & PSW_MASK_PSTATE) { | 237 | if (regs->psw.mask & PSW_MASK_PSTATE) { |
@@ -263,13 +245,17 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, | |||
263 | do_no_context(regs, int_code, trans_exc_code); | 245 | do_no_context(regs, int_code, trans_exc_code); |
264 | break; | 246 | break; |
265 | default: /* fault & VM_FAULT_ERROR */ | 247 | default: /* fault & VM_FAULT_ERROR */ |
266 | if (fault & VM_FAULT_OOM) | 248 | if (fault & VM_FAULT_OOM) { |
267 | pagefault_out_of_memory(); | 249 | if (!(regs->psw.mask & PSW_MASK_PSTATE)) |
268 | else if (fault & VM_FAULT_SIGBUS) { | 250 | do_no_context(regs, int_code, trans_exc_code); |
269 | do_sigbus(regs, int_code, trans_exc_code); | 251 | else |
252 | pagefault_out_of_memory(); | ||
253 | } else if (fault & VM_FAULT_SIGBUS) { | ||
270 | /* Kernel mode? Handle exceptions or die */ | 254 | /* Kernel mode? Handle exceptions or die */ |
271 | if (!(regs->psw.mask & PSW_MASK_PSTATE)) | 255 | if (!(regs->psw.mask & PSW_MASK_PSTATE)) |
272 | do_no_context(regs, int_code, trans_exc_code); | 256 | do_no_context(regs, int_code, trans_exc_code); |
257 | else | ||
258 | do_sigbus(regs, int_code, trans_exc_code); | ||
273 | } else | 259 | } else |
274 | BUG(); | 260 | BUG(); |
275 | break; | 261 | break; |
@@ -294,6 +280,7 @@ static inline int do_exception(struct pt_regs *regs, int access, | |||
294 | struct mm_struct *mm; | 280 | struct mm_struct *mm; |
295 | struct vm_area_struct *vma; | 281 | struct vm_area_struct *vma; |
296 | unsigned long address; | 282 | unsigned long address; |
283 | unsigned int flags; | ||
297 | int fault; | 284 | int fault; |
298 | 285 | ||
299 | if (notify_page_fault(regs)) | 286 | if (notify_page_fault(regs)) |
@@ -312,13 +299,11 @@ static inline int do_exception(struct pt_regs *regs, int access, | |||
312 | goto out; | 299 | goto out; |
313 | 300 | ||
314 | address = trans_exc_code & __FAIL_ADDR_MASK; | 301 | address = trans_exc_code & __FAIL_ADDR_MASK; |
315 | /* | ||
316 | * When we get here, the fault happened in the current | ||
317 | * task's user address space, so we can switch on the | ||
318 | * interrupts again and then search the VMAs | ||
319 | */ | ||
320 | local_irq_enable(); | ||
321 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); | 302 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); |
303 | flags = FAULT_FLAG_ALLOW_RETRY; | ||
304 | if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) | ||
305 | flags |= FAULT_FLAG_WRITE; | ||
306 | retry: | ||
322 | down_read(&mm->mmap_sem); | 307 | down_read(&mm->mmap_sem); |
323 | 308 | ||
324 | fault = VM_FAULT_BADMAP; | 309 | fault = VM_FAULT_BADMAP; |
@@ -348,25 +333,37 @@ static inline int do_exception(struct pt_regs *regs, int access, | |||
348 | * make sure we exit gracefully rather than endlessly redo | 333 | * make sure we exit gracefully rather than endlessly redo |
349 | * the fault. | 334 | * the fault. |
350 | */ | 335 | */ |
351 | fault = handle_mm_fault(mm, vma, address, | 336 | fault = handle_mm_fault(mm, vma, address, flags); |
352 | (access == VM_WRITE) ? FAULT_FLAG_WRITE : 0); | ||
353 | if (unlikely(fault & VM_FAULT_ERROR)) | 337 | if (unlikely(fault & VM_FAULT_ERROR)) |
354 | goto out_up; | 338 | goto out_up; |
355 | 339 | ||
356 | if (fault & VM_FAULT_MAJOR) { | 340 | /* |
357 | tsk->maj_flt++; | 341 | * Major/minor page fault accounting is only done on the |
358 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, | 342 | * initial attempt. If we go through a retry, it is extremely |
359 | regs, address); | 343 | * likely that the page will be found in page cache at that point. |
360 | } else { | 344 | */ |
361 | tsk->min_flt++; | 345 | if (flags & FAULT_FLAG_ALLOW_RETRY) { |
362 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, | 346 | if (fault & VM_FAULT_MAJOR) { |
363 | regs, address); | 347 | tsk->maj_flt++; |
348 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, | ||
349 | regs, address); | ||
350 | } else { | ||
351 | tsk->min_flt++; | ||
352 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, | ||
353 | regs, address); | ||
354 | } | ||
355 | if (fault & VM_FAULT_RETRY) { | ||
356 | /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk | ||
357 | * of starvation. */ | ||
358 | flags &= ~FAULT_FLAG_ALLOW_RETRY; | ||
359 | goto retry; | ||
360 | } | ||
364 | } | 361 | } |
365 | /* | 362 | /* |
366 | * The instruction that caused the program check will | 363 | * The instruction that caused the program check will |
367 | * be repeated. Don't signal single step via SIGTRAP. | 364 | * be repeated. Don't signal single step via SIGTRAP. |
368 | */ | 365 | */ |
369 | clear_tsk_thread_flag(tsk, TIF_SINGLE_STEP); | 366 | clear_tsk_thread_flag(tsk, TIF_PER_TRAP); |
370 | fault = 0; | 367 | fault = 0; |
371 | out_up: | 368 | out_up: |
372 | up_read(&mm->mmap_sem); | 369 | up_read(&mm->mmap_sem); |
@@ -374,20 +371,20 @@ out: | |||
374 | return fault; | 371 | return fault; |
375 | } | 372 | } |
376 | 373 | ||
377 | void __kprobes do_protection_exception(struct pt_regs *regs, long int_code) | 374 | void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code, |
375 | unsigned long trans_exc_code) | ||
378 | { | 376 | { |
379 | unsigned long trans_exc_code = S390_lowcore.trans_exc_code; | ||
380 | int fault; | 377 | int fault; |
381 | 378 | ||
382 | /* Protection exception is supressing, decrement psw address. */ | 379 | /* Protection exception is suppressing, decrement psw address. */ |
383 | regs->psw.addr -= (int_code >> 16); | 380 | regs->psw.addr -= (pgm_int_code >> 16); |
384 | /* | 381 | /* |
385 | * Check for low-address protection. This needs to be treated | 382 | * Check for low-address protection. This needs to be treated |
386 | * as a special case because the translation exception code | 383 | * as a special case because the translation exception code |
387 | * field is not guaranteed to contain valid data in this case. | 384 | * field is not guaranteed to contain valid data in this case. |
388 | */ | 385 | */ |
389 | if (unlikely(!(trans_exc_code & 4))) { | 386 | if (unlikely(!(trans_exc_code & 4))) { |
390 | do_low_address(regs, int_code, trans_exc_code); | 387 | do_low_address(regs, pgm_int_code, trans_exc_code); |
391 | return; | 388 | return; |
392 | } | 389 | } |
393 | fault = do_exception(regs, VM_WRITE, trans_exc_code); | 390 | fault = do_exception(regs, VM_WRITE, trans_exc_code); |
@@ -395,34 +392,27 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long int_code) | |||
395 | do_fault_error(regs, 4, trans_exc_code, fault); | 392 | do_fault_error(regs, 4, trans_exc_code, fault); |
396 | } | 393 | } |
397 | 394 | ||
398 | void __kprobes do_dat_exception(struct pt_regs *regs, long int_code) | 395 | void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code, |
396 | unsigned long trans_exc_code) | ||
399 | { | 397 | { |
400 | unsigned long trans_exc_code = S390_lowcore.trans_exc_code; | ||
401 | int access, fault; | 398 | int access, fault; |
402 | 399 | ||
403 | access = VM_READ | VM_EXEC | VM_WRITE; | 400 | access = VM_READ | VM_EXEC | VM_WRITE; |
404 | #ifdef CONFIG_S390_EXEC_PROTECT | ||
405 | if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && | ||
406 | (trans_exc_code & 3) == 0) | ||
407 | access = VM_EXEC; | ||
408 | #endif | ||
409 | fault = do_exception(regs, access, trans_exc_code); | 401 | fault = do_exception(regs, access, trans_exc_code); |
410 | if (unlikely(fault)) | 402 | if (unlikely(fault)) |
411 | do_fault_error(regs, int_code & 255, trans_exc_code, fault); | 403 | do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault); |
412 | } | 404 | } |
413 | 405 | ||
414 | #ifdef CONFIG_64BIT | 406 | #ifdef CONFIG_64BIT |
415 | void __kprobes do_asce_exception(struct pt_regs *regs, long int_code) | 407 | void __kprobes do_asce_exception(struct pt_regs *regs, long pgm_int_code, |
408 | unsigned long trans_exc_code) | ||
416 | { | 409 | { |
417 | unsigned long trans_exc_code = S390_lowcore.trans_exc_code; | ||
418 | struct mm_struct *mm = current->mm; | 410 | struct mm_struct *mm = current->mm; |
419 | struct vm_area_struct *vma; | 411 | struct vm_area_struct *vma; |
420 | 412 | ||
421 | if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) | 413 | if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) |
422 | goto no_context; | 414 | goto no_context; |
423 | 415 | ||
424 | local_irq_enable(); | ||
425 | |||
426 | down_read(&mm->mmap_sem); | 416 | down_read(&mm->mmap_sem); |
427 | vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK); | 417 | vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK); |
428 | up_read(&mm->mmap_sem); | 418 | up_read(&mm->mmap_sem); |
@@ -434,16 +424,16 @@ void __kprobes do_asce_exception(struct pt_regs *regs, long int_code) | |||
434 | 424 | ||
435 | /* User mode accesses just cause a SIGSEGV */ | 425 | /* User mode accesses just cause a SIGSEGV */ |
436 | if (regs->psw.mask & PSW_MASK_PSTATE) { | 426 | if (regs->psw.mask & PSW_MASK_PSTATE) { |
437 | do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); | 427 | do_sigsegv(regs, pgm_int_code, SEGV_MAPERR, trans_exc_code); |
438 | return; | 428 | return; |
439 | } | 429 | } |
440 | 430 | ||
441 | no_context: | 431 | no_context: |
442 | do_no_context(regs, int_code, trans_exc_code); | 432 | do_no_context(regs, pgm_int_code, trans_exc_code); |
443 | } | 433 | } |
444 | #endif | 434 | #endif |
445 | 435 | ||
446 | int __handle_fault(unsigned long uaddr, unsigned long int_code, int write_user) | 436 | int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) |
447 | { | 437 | { |
448 | struct pt_regs regs; | 438 | struct pt_regs regs; |
449 | int access, fault; | 439 | int access, fault; |
@@ -454,14 +444,13 @@ int __handle_fault(unsigned long uaddr, unsigned long int_code, int write_user) | |||
454 | regs.psw.addr = (unsigned long) __builtin_return_address(0); | 444 | regs.psw.addr = (unsigned long) __builtin_return_address(0); |
455 | regs.psw.addr |= PSW_ADDR_AMODE; | 445 | regs.psw.addr |= PSW_ADDR_AMODE; |
456 | uaddr &= PAGE_MASK; | 446 | uaddr &= PAGE_MASK; |
457 | access = write_user ? VM_WRITE : VM_READ; | 447 | access = write ? VM_WRITE : VM_READ; |
458 | fault = do_exception(®s, access, uaddr | 2); | 448 | fault = do_exception(®s, access, uaddr | 2); |
459 | if (unlikely(fault)) { | 449 | if (unlikely(fault)) { |
460 | if (fault & VM_FAULT_OOM) { | 450 | if (fault & VM_FAULT_OOM) |
461 | pagefault_out_of_memory(); | 451 | return -EFAULT; |
462 | fault = 0; | 452 | else if (fault & VM_FAULT_SIGBUS) |
463 | } else if (fault & VM_FAULT_SIGBUS) | 453 | do_sigbus(®s, pgm_int_code, uaddr); |
464 | do_sigbus(®s, int_code, uaddr); | ||
465 | } | 454 | } |
466 | return fault ? -EFAULT : 0; | 455 | return fault ? -EFAULT : 0; |
467 | } | 456 | } |
@@ -470,8 +459,7 @@ int __handle_fault(unsigned long uaddr, unsigned long int_code, int write_user) | |||
470 | /* | 459 | /* |
471 | * 'pfault' pseudo page faults routines. | 460 | * 'pfault' pseudo page faults routines. |
472 | */ | 461 | */ |
473 | static ext_int_info_t ext_int_pfault; | 462 | static int pfault_disable; |
474 | static int pfault_disable = 0; | ||
475 | 463 | ||
476 | static int __init nopfault(char *str) | 464 | static int __init nopfault(char *str) |
477 | { | 465 | { |
@@ -481,22 +469,28 @@ static int __init nopfault(char *str) | |||
481 | 469 | ||
482 | __setup("nopfault", nopfault); | 470 | __setup("nopfault", nopfault); |
483 | 471 | ||
484 | typedef struct { | 472 | struct pfault_refbk { |
485 | __u16 refdiagc; | 473 | u16 refdiagc; |
486 | __u16 reffcode; | 474 | u16 reffcode; |
487 | __u16 refdwlen; | 475 | u16 refdwlen; |
488 | __u16 refversn; | 476 | u16 refversn; |
489 | __u64 refgaddr; | 477 | u64 refgaddr; |
490 | __u64 refselmk; | 478 | u64 refselmk; |
491 | __u64 refcmpmk; | 479 | u64 refcmpmk; |
492 | __u64 reserved; | 480 | u64 reserved; |
493 | } __attribute__ ((packed, aligned(8))) pfault_refbk_t; | 481 | } __attribute__ ((packed, aligned(8))); |
494 | 482 | ||
495 | int pfault_init(void) | 483 | int pfault_init(void) |
496 | { | 484 | { |
497 | pfault_refbk_t refbk = | 485 | struct pfault_refbk refbk = { |
498 | { 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48, | 486 | .refdiagc = 0x258, |
499 | __PF_RES_FIELD }; | 487 | .reffcode = 0, |
488 | .refdwlen = 5, | ||
489 | .refversn = 2, | ||
490 | .refgaddr = __LC_CURRENT_PID, | ||
491 | .refselmk = 1ULL << 48, | ||
492 | .refcmpmk = 1ULL << 48, | ||
493 | .reserved = __PF_RES_FIELD }; | ||
500 | int rc; | 494 | int rc; |
501 | 495 | ||
502 | if (!MACHINE_IS_VM || pfault_disable) | 496 | if (!MACHINE_IS_VM || pfault_disable) |
@@ -508,18 +502,20 @@ int pfault_init(void) | |||
508 | "2:\n" | 502 | "2:\n" |
509 | EX_TABLE(0b,1b) | 503 | EX_TABLE(0b,1b) |
510 | : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc"); | 504 | : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc"); |
511 | __ctl_set_bit(0, 9); | ||
512 | return rc; | 505 | return rc; |
513 | } | 506 | } |
514 | 507 | ||
515 | void pfault_fini(void) | 508 | void pfault_fini(void) |
516 | { | 509 | { |
517 | pfault_refbk_t refbk = | 510 | struct pfault_refbk refbk = { |
518 | { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL }; | 511 | .refdiagc = 0x258, |
512 | .reffcode = 1, | ||
513 | .refdwlen = 5, | ||
514 | .refversn = 2, | ||
515 | }; | ||
519 | 516 | ||
520 | if (!MACHINE_IS_VM || pfault_disable) | 517 | if (!MACHINE_IS_VM || pfault_disable) |
521 | return; | 518 | return; |
522 | __ctl_clear_bit(0,9); | ||
523 | asm volatile( | 519 | asm volatile( |
524 | " diag %0,0,0x258\n" | 520 | " diag %0,0,0x258\n" |
525 | "0:\n" | 521 | "0:\n" |
@@ -527,10 +523,15 @@ void pfault_fini(void) | |||
527 | : : "a" (&refbk), "m" (refbk) : "cc"); | 523 | : : "a" (&refbk), "m" (refbk) : "cc"); |
528 | } | 524 | } |
529 | 525 | ||
530 | static void pfault_interrupt(__u16 int_code) | 526 | static DEFINE_SPINLOCK(pfault_lock); |
527 | static LIST_HEAD(pfault_list); | ||
528 | |||
529 | static void pfault_interrupt(unsigned int ext_int_code, | ||
530 | unsigned int param32, unsigned long param64) | ||
531 | { | 531 | { |
532 | struct task_struct *tsk; | 532 | struct task_struct *tsk; |
533 | __u16 subcode; | 533 | __u16 subcode; |
534 | pid_t pid; | ||
534 | 535 | ||
535 | /* | 536 | /* |
536 | * Get the external interruption subcode & pfault | 537 | * Get the external interruption subcode & pfault |
@@ -538,63 +539,107 @@ static void pfault_interrupt(__u16 int_code) | |||
538 | * in the 'cpu address' field associated with the | 539 | * in the 'cpu address' field associated with the |
539 | * external interrupt. | 540 | * external interrupt. |
540 | */ | 541 | */ |
541 | subcode = S390_lowcore.cpu_addr; | 542 | subcode = ext_int_code >> 16; |
542 | if ((subcode & 0xff00) != __SUBCODE_MASK) | 543 | if ((subcode & 0xff00) != __SUBCODE_MASK) |
543 | return; | 544 | return; |
544 | 545 | kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; | |
545 | /* | 546 | if (subcode & 0x0080) { |
546 | * Get the token (= address of the task structure of the affected task). | 547 | /* Get the token (= pid of the affected task). */ |
547 | */ | 548 | pid = sizeof(void *) == 4 ? param32 : param64; |
548 | tsk = *(struct task_struct **) __LC_PFAULT_INTPARM; | 549 | rcu_read_lock(); |
549 | 550 | tsk = find_task_by_pid_ns(pid, &init_pid_ns); | |
551 | if (tsk) | ||
552 | get_task_struct(tsk); | ||
553 | rcu_read_unlock(); | ||
554 | if (!tsk) | ||
555 | return; | ||
556 | } else { | ||
557 | tsk = current; | ||
558 | } | ||
559 | spin_lock(&pfault_lock); | ||
550 | if (subcode & 0x0080) { | 560 | if (subcode & 0x0080) { |
551 | /* signal bit is set -> a page has been swapped in by VM */ | 561 | /* signal bit is set -> a page has been swapped in by VM */ |
552 | if (xchg(&tsk->thread.pfault_wait, -1) != 0) { | 562 | if (tsk->thread.pfault_wait == 1) { |
553 | /* Initial interrupt was faster than the completion | 563 | /* Initial interrupt was faster than the completion |
554 | * interrupt. pfault_wait is valid. Set pfault_wait | 564 | * interrupt. pfault_wait is valid. Set pfault_wait |
555 | * back to zero and wake up the process. This can | 565 | * back to zero and wake up the process. This can |
556 | * safely be done because the task is still sleeping | 566 | * safely be done because the task is still sleeping |
557 | * and can't produce new pfaults. */ | 567 | * and can't produce new pfaults. */ |
558 | tsk->thread.pfault_wait = 0; | 568 | tsk->thread.pfault_wait = 0; |
569 | list_del(&tsk->thread.list); | ||
559 | wake_up_process(tsk); | 570 | wake_up_process(tsk); |
560 | put_task_struct(tsk); | 571 | } else { |
572 | /* Completion interrupt was faster than initial | ||
573 | * interrupt. Set pfault_wait to -1 so the initial | ||
574 | * interrupt doesn't put the task to sleep. */ | ||
575 | tsk->thread.pfault_wait = -1; | ||
561 | } | 576 | } |
577 | put_task_struct(tsk); | ||
562 | } else { | 578 | } else { |
563 | /* signal bit not set -> a real page is missing. */ | 579 | /* signal bit not set -> a real page is missing. */ |
564 | get_task_struct(tsk); | 580 | if (tsk->thread.pfault_wait == -1) { |
565 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
566 | if (xchg(&tsk->thread.pfault_wait, 1) != 0) { | ||
567 | /* Completion interrupt was faster than the initial | 581 | /* Completion interrupt was faster than the initial |
568 | * interrupt (swapped in a -1 for pfault_wait). Set | 582 | * interrupt (pfault_wait == -1). Set pfault_wait |
569 | * pfault_wait back to zero and exit. This can be | 583 | * back to zero and exit. */ |
570 | * done safely because tsk is running in kernel | ||
571 | * mode and can't produce new pfaults. */ | ||
572 | tsk->thread.pfault_wait = 0; | 584 | tsk->thread.pfault_wait = 0; |
573 | set_task_state(tsk, TASK_RUNNING); | 585 | } else { |
574 | put_task_struct(tsk); | 586 | /* Initial interrupt arrived before completion |
575 | } else | 587 | * interrupt. Let the task sleep. */ |
588 | tsk->thread.pfault_wait = 1; | ||
589 | list_add(&tsk->thread.list, &pfault_list); | ||
590 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
576 | set_tsk_need_resched(tsk); | 591 | set_tsk_need_resched(tsk); |
592 | } | ||
577 | } | 593 | } |
594 | spin_unlock(&pfault_lock); | ||
578 | } | 595 | } |
579 | 596 | ||
580 | void __init pfault_irq_init(void) | 597 | static int __cpuinit pfault_cpu_notify(struct notifier_block *self, |
598 | unsigned long action, void *hcpu) | ||
581 | { | 599 | { |
582 | if (!MACHINE_IS_VM) | 600 | struct thread_struct *thread, *next; |
583 | return; | 601 | struct task_struct *tsk; |
584 | 602 | ||
585 | /* | 603 | switch (action) { |
586 | * Try to get pfault pseudo page faults going. | 604 | case CPU_DEAD: |
587 | */ | 605 | case CPU_DEAD_FROZEN: |
588 | if (register_early_external_interrupt(0x2603, pfault_interrupt, | 606 | spin_lock_irq(&pfault_lock); |
589 | &ext_int_pfault) != 0) | 607 | list_for_each_entry_safe(thread, next, &pfault_list, list) { |
590 | panic("Couldn't request external interrupt 0x2603"); | 608 | thread->pfault_wait = 0; |
609 | list_del(&thread->list); | ||
610 | tsk = container_of(thread, struct task_struct, thread); | ||
611 | wake_up_process(tsk); | ||
612 | } | ||
613 | spin_unlock_irq(&pfault_lock); | ||
614 | break; | ||
615 | default: | ||
616 | break; | ||
617 | } | ||
618 | return NOTIFY_OK; | ||
619 | } | ||
591 | 620 | ||
592 | if (pfault_init() == 0) | 621 | static int __init pfault_irq_init(void) |
593 | return; | 622 | { |
623 | int rc; | ||
594 | 624 | ||
595 | /* Tough luck, no pfault. */ | 625 | if (!MACHINE_IS_VM) |
626 | return 0; | ||
627 | rc = register_external_interrupt(0x2603, pfault_interrupt); | ||
628 | if (rc) | ||
629 | goto out_extint; | ||
630 | rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; | ||
631 | if (rc) | ||
632 | goto out_pfault; | ||
633 | service_subclass_irq_register(); | ||
634 | hotcpu_notifier(pfault_cpu_notify, 0); | ||
635 | return 0; | ||
636 | |||
637 | out_pfault: | ||
638 | unregister_external_interrupt(0x2603, pfault_interrupt); | ||
639 | out_extint: | ||
596 | pfault_disable = 1; | 640 | pfault_disable = 1; |
597 | unregister_early_external_interrupt(0x2603, pfault_interrupt, | 641 | return rc; |
598 | &ext_int_pfault); | ||
599 | } | 642 | } |
600 | #endif | 643 | early_initcall(pfault_irq_init); |
644 | |||
645 | #endif /* CONFIG_PFAULT */ | ||
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c new file mode 100644 index 000000000000..45b405ca2567 --- /dev/null +++ b/arch/s390/mm/gup.c | |||
@@ -0,0 +1,224 @@ | |||
1 | /* | ||
2 | * Lockless get_user_pages_fast for s390 | ||
3 | * | ||
4 | * Copyright IBM Corp. 2010 | ||
5 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | ||
6 | */ | ||
7 | #include <linux/sched.h> | ||
8 | #include <linux/mm.h> | ||
9 | #include <linux/hugetlb.h> | ||
10 | #include <linux/vmstat.h> | ||
11 | #include <linux/pagemap.h> | ||
12 | #include <linux/rwsem.h> | ||
13 | #include <asm/pgtable.h> | ||
14 | |||
15 | /* | ||
16 | * The performance critical leaf functions are made noinline otherwise gcc | ||
17 | * inlines everything into a single function which results in too much | ||
18 | * register pressure. | ||
19 | */ | ||
20 | static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, | ||
21 | unsigned long end, int write, struct page **pages, int *nr) | ||
22 | { | ||
23 | unsigned long mask; | ||
24 | pte_t *ptep, pte; | ||
25 | struct page *page; | ||
26 | |||
27 | mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL; | ||
28 | |||
29 | ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); | ||
30 | do { | ||
31 | pte = *ptep; | ||
32 | barrier(); | ||
33 | if ((pte_val(pte) & mask) != 0) | ||
34 | return 0; | ||
35 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
36 | page = pte_page(pte); | ||
37 | if (!page_cache_get_speculative(page)) | ||
38 | return 0; | ||
39 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { | ||
40 | put_page(page); | ||
41 | return 0; | ||
42 | } | ||
43 | pages[*nr] = page; | ||
44 | (*nr)++; | ||
45 | |||
46 | } while (ptep++, addr += PAGE_SIZE, addr != end); | ||
47 | |||
48 | return 1; | ||
49 | } | ||
50 | |||
51 | static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, | ||
52 | unsigned long end, int write, struct page **pages, int *nr) | ||
53 | { | ||
54 | unsigned long mask, result; | ||
55 | struct page *head, *page; | ||
56 | int refs; | ||
57 | |||
58 | result = write ? 0 : _SEGMENT_ENTRY_RO; | ||
59 | mask = result | _SEGMENT_ENTRY_INV; | ||
60 | if ((pmd_val(pmd) & mask) != result) | ||
61 | return 0; | ||
62 | VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); | ||
63 | |||
64 | refs = 0; | ||
65 | head = pmd_page(pmd); | ||
66 | page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); | ||
67 | do { | ||
68 | VM_BUG_ON(compound_head(page) != head); | ||
69 | pages[*nr] = page; | ||
70 | (*nr)++; | ||
71 | page++; | ||
72 | refs++; | ||
73 | } while (addr += PAGE_SIZE, addr != end); | ||
74 | |||
75 | if (!page_cache_add_speculative(head, refs)) { | ||
76 | *nr -= refs; | ||
77 | return 0; | ||
78 | } | ||
79 | |||
80 | if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { | ||
81 | *nr -= refs; | ||
82 | while (refs--) | ||
83 | put_page(head); | ||
84 | } | ||
85 | |||
86 | return 1; | ||
87 | } | ||
88 | |||
89 | |||
90 | static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, | ||
91 | unsigned long end, int write, struct page **pages, int *nr) | ||
92 | { | ||
93 | unsigned long next; | ||
94 | pmd_t *pmdp, pmd; | ||
95 | |||
96 | pmdp = (pmd_t *) pudp; | ||
97 | #ifdef CONFIG_64BIT | ||
98 | if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) | ||
99 | pmdp = (pmd_t *) pud_deref(pud); | ||
100 | pmdp += pmd_index(addr); | ||
101 | #endif | ||
102 | do { | ||
103 | pmd = *pmdp; | ||
104 | barrier(); | ||
105 | next = pmd_addr_end(addr, end); | ||
106 | if (pmd_none(pmd)) | ||
107 | return 0; | ||
108 | if (unlikely(pmd_huge(pmd))) { | ||
109 | if (!gup_huge_pmd(pmdp, pmd, addr, next, | ||
110 | write, pages, nr)) | ||
111 | return 0; | ||
112 | } else if (!gup_pte_range(pmdp, pmd, addr, next, | ||
113 | write, pages, nr)) | ||
114 | return 0; | ||
115 | } while (pmdp++, addr = next, addr != end); | ||
116 | |||
117 | return 1; | ||
118 | } | ||
119 | |||
120 | static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, | ||
121 | unsigned long end, int write, struct page **pages, int *nr) | ||
122 | { | ||
123 | unsigned long next; | ||
124 | pud_t *pudp, pud; | ||
125 | |||
126 | pudp = (pud_t *) pgdp; | ||
127 | #ifdef CONFIG_64BIT | ||
128 | if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) | ||
129 | pudp = (pud_t *) pgd_deref(pgd); | ||
130 | pudp += pud_index(addr); | ||
131 | #endif | ||
132 | do { | ||
133 | pud = *pudp; | ||
134 | barrier(); | ||
135 | next = pud_addr_end(addr, end); | ||
136 | if (pud_none(pud)) | ||
137 | return 0; | ||
138 | if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr)) | ||
139 | return 0; | ||
140 | } while (pudp++, addr = next, addr != end); | ||
141 | |||
142 | return 1; | ||
143 | } | ||
144 | |||
145 | /** | ||
146 | * get_user_pages_fast() - pin user pages in memory | ||
147 | * @start: starting user address | ||
148 | * @nr_pages: number of pages from start to pin | ||
149 | * @write: whether pages will be written to | ||
150 | * @pages: array that receives pointers to the pages pinned. | ||
151 | * Should be at least nr_pages long. | ||
152 | * | ||
153 | * Attempt to pin user pages in memory without taking mm->mmap_sem. | ||
154 | * If not successful, it will fall back to taking the lock and | ||
155 | * calling get_user_pages(). | ||
156 | * | ||
157 | * Returns number of pages pinned. This may be fewer than the number | ||
158 | * requested. If nr_pages is 0 or negative, returns 0. If no pages | ||
159 | * were pinned, returns -errno. | ||
160 | */ | ||
161 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
162 | struct page **pages) | ||
163 | { | ||
164 | struct mm_struct *mm = current->mm; | ||
165 | unsigned long addr, len, end; | ||
166 | unsigned long next; | ||
167 | pgd_t *pgdp, pgd; | ||
168 | int nr = 0; | ||
169 | |||
170 | start &= PAGE_MASK; | ||
171 | addr = start; | ||
172 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
173 | end = start + len; | ||
174 | if (end < start) | ||
175 | goto slow_irqon; | ||
176 | |||
177 | /* | ||
178 | * local_irq_disable() doesn't prevent pagetable teardown, but does | ||
179 | * prevent the pagetables from being freed on s390. | ||
180 | * | ||
181 | * So long as we atomically load page table pointers versus teardown, | ||
182 | * we can follow the address down to the the page and take a ref on it. | ||
183 | */ | ||
184 | local_irq_disable(); | ||
185 | pgdp = pgd_offset(mm, addr); | ||
186 | do { | ||
187 | pgd = *pgdp; | ||
188 | barrier(); | ||
189 | next = pgd_addr_end(addr, end); | ||
190 | if (pgd_none(pgd)) | ||
191 | goto slow; | ||
192 | if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) | ||
193 | goto slow; | ||
194 | } while (pgdp++, addr = next, addr != end); | ||
195 | local_irq_enable(); | ||
196 | |||
197 | VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); | ||
198 | return nr; | ||
199 | |||
200 | { | ||
201 | int ret; | ||
202 | slow: | ||
203 | local_irq_enable(); | ||
204 | slow_irqon: | ||
205 | /* Try to get the remaining pages with get_user_pages */ | ||
206 | start += nr << PAGE_SHIFT; | ||
207 | pages += nr; | ||
208 | |||
209 | down_read(&mm->mmap_sem); | ||
210 | ret = get_user_pages(current, mm, start, | ||
211 | (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); | ||
212 | up_read(&mm->mmap_sem); | ||
213 | |||
214 | /* Have to be a bit careful with return values */ | ||
215 | if (nr > 0) { | ||
216 | if (ret < 0) | ||
217 | ret = nr; | ||
218 | else | ||
219 | ret += nr; | ||
220 | } | ||
221 | |||
222 | return ret; | ||
223 | } | ||
224 | } | ||
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index f28c43d2f61d..a4d856db9154 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c | |||
@@ -13,7 +13,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | |||
13 | pte_t *pteptr, pte_t pteval) | 13 | pte_t *pteptr, pte_t pteval) |
14 | { | 14 | { |
15 | pmd_t *pmdp = (pmd_t *) pteptr; | 15 | pmd_t *pmdp = (pmd_t *) pteptr; |
16 | pte_t shadow_pteval = pteval; | ||
17 | unsigned long mask; | 16 | unsigned long mask; |
18 | 17 | ||
19 | if (!MACHINE_HAS_HPAGE) { | 18 | if (!MACHINE_HAS_HPAGE) { |
@@ -21,18 +20,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | |||
21 | mask = pte_val(pteval) & | 20 | mask = pte_val(pteval) & |
22 | (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); | 21 | (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); |
23 | pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; | 22 | pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; |
24 | if (mm->context.noexec) { | ||
25 | pteptr += PTRS_PER_PTE; | ||
26 | pte_val(shadow_pteval) = | ||
27 | (_SEGMENT_ENTRY + __pa(pteptr)) | mask; | ||
28 | } | ||
29 | } | 23 | } |
30 | 24 | ||
31 | pmd_val(*pmdp) = pte_val(pteval); | 25 | pmd_val(*pmdp) = pte_val(pteval); |
32 | if (mm->context.noexec) { | ||
33 | pmdp = get_shadow_table(pmdp); | ||
34 | pmd_val(*pmdp) = pte_val(shadow_pteval); | ||
35 | } | ||
36 | } | 26 | } |
37 | 27 | ||
38 | int arch_prepare_hugepage(struct page *page) | 28 | int arch_prepare_hugepage(struct page *page) |
@@ -68,7 +58,7 @@ void arch_release_hugepage(struct page *page) | |||
68 | ptep = (pte_t *) page[1].index; | 58 | ptep = (pte_t *) page[1].index; |
69 | if (!ptep) | 59 | if (!ptep) |
70 | return; | 60 | return; |
71 | pte_free(&init_mm, ptep); | 61 | page_table_free(&init_mm, (unsigned long *) ptep); |
72 | page[1].index = 0; | 62 | page[1].index = 0; |
73 | } | 63 | } |
74 | 64 | ||
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 30eb6d02ddb8..59b663109d90 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c | |||
@@ -38,19 +38,59 @@ | |||
38 | #include <asm/tlbflush.h> | 38 | #include <asm/tlbflush.h> |
39 | #include <asm/sections.h> | 39 | #include <asm/sections.h> |
40 | 40 | ||
41 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
42 | |||
43 | pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); | 41 | pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); |
44 | 42 | ||
45 | char empty_zero_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); | 43 | unsigned long empty_zero_page, zero_page_mask; |
46 | EXPORT_SYMBOL(empty_zero_page); | 44 | EXPORT_SYMBOL(empty_zero_page); |
47 | 45 | ||
46 | static unsigned long setup_zero_pages(void) | ||
47 | { | ||
48 | struct cpuid cpu_id; | ||
49 | unsigned int order; | ||
50 | unsigned long size; | ||
51 | struct page *page; | ||
52 | int i; | ||
53 | |||
54 | get_cpu_id(&cpu_id); | ||
55 | switch (cpu_id.machine) { | ||
56 | case 0x9672: /* g5 */ | ||
57 | case 0x2064: /* z900 */ | ||
58 | case 0x2066: /* z900 */ | ||
59 | case 0x2084: /* z990 */ | ||
60 | case 0x2086: /* z990 */ | ||
61 | case 0x2094: /* z9-109 */ | ||
62 | case 0x2096: /* z9-109 */ | ||
63 | order = 0; | ||
64 | break; | ||
65 | case 0x2097: /* z10 */ | ||
66 | case 0x2098: /* z10 */ | ||
67 | default: | ||
68 | order = 2; | ||
69 | break; | ||
70 | } | ||
71 | |||
72 | empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); | ||
73 | if (!empty_zero_page) | ||
74 | panic("Out of memory in setup_zero_pages"); | ||
75 | |||
76 | page = virt_to_page((void *) empty_zero_page); | ||
77 | split_page(page, order); | ||
78 | for (i = 1 << order; i > 0; i--) { | ||
79 | SetPageReserved(page); | ||
80 | page++; | ||
81 | } | ||
82 | |||
83 | size = PAGE_SIZE << order; | ||
84 | zero_page_mask = (size - 1) & PAGE_MASK; | ||
85 | |||
86 | return 1UL << order; | ||
87 | } | ||
88 | |||
48 | /* | 89 | /* |
49 | * paging_init() sets up the page tables | 90 | * paging_init() sets up the page tables |
50 | */ | 91 | */ |
51 | void __init paging_init(void) | 92 | void __init paging_init(void) |
52 | { | 93 | { |
53 | static const int ssm_mask = 0x04000000L; | ||
54 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 94 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
55 | unsigned long pgd_type; | 95 | unsigned long pgd_type; |
56 | 96 | ||
@@ -72,18 +112,17 @@ void __init paging_init(void) | |||
72 | __ctl_load(S390_lowcore.kernel_asce, 1, 1); | 112 | __ctl_load(S390_lowcore.kernel_asce, 1, 1); |
73 | __ctl_load(S390_lowcore.kernel_asce, 7, 7); | 113 | __ctl_load(S390_lowcore.kernel_asce, 7, 7); |
74 | __ctl_load(S390_lowcore.kernel_asce, 13, 13); | 114 | __ctl_load(S390_lowcore.kernel_asce, 13, 13); |
75 | __raw_local_irq_ssm(ssm_mask); | 115 | arch_local_irq_restore(4UL << (BITS_PER_LONG - 8)); |
76 | 116 | ||
77 | atomic_set(&init_mm.context.attach_count, 1); | 117 | atomic_set(&init_mm.context.attach_count, 1); |
78 | 118 | ||
79 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | 119 | sparse_memory_present_with_active_regions(MAX_NUMNODES); |
80 | sparse_init(); | 120 | sparse_init(); |
81 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 121 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
82 | #ifdef CONFIG_ZONE_DMA | ||
83 | max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); | 122 | max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); |
84 | #endif | ||
85 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | 123 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; |
86 | free_area_init_nodes(max_zone_pfns); | 124 | free_area_init_nodes(max_zone_pfns); |
125 | fault_init(); | ||
87 | } | 126 | } |
88 | 127 | ||
89 | void __init mem_init(void) | 128 | void __init mem_init(void) |
@@ -93,14 +132,12 @@ void __init mem_init(void) | |||
93 | max_mapnr = num_physpages = max_low_pfn; | 132 | max_mapnr = num_physpages = max_low_pfn; |
94 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); | 133 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); |
95 | 134 | ||
96 | /* clear the zero-page */ | ||
97 | memset(empty_zero_page, 0, PAGE_SIZE); | ||
98 | |||
99 | /* Setup guest page hinting */ | 135 | /* Setup guest page hinting */ |
100 | cmma_init(); | 136 | cmma_init(); |
101 | 137 | ||
102 | /* this will put all low memory onto the freelists */ | 138 | /* this will put all low memory onto the freelists */ |
103 | totalram_pages += free_all_bootmem(); | 139 | totalram_pages += free_all_bootmem(); |
140 | totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */ | ||
104 | 141 | ||
105 | reservedpages = 0; | 142 | reservedpages = 0; |
106 | 143 | ||
@@ -136,7 +173,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
136 | pmd = pmd_offset(pud, address); | 173 | pmd = pmd_offset(pud, address); |
137 | pte = pte_offset_kernel(pmd, address); | 174 | pte = pte_offset_kernel(pmd, address); |
138 | if (!enable) { | 175 | if (!enable) { |
139 | ptep_invalidate(&init_mm, address, pte); | 176 | __ptep_ipte(address, pte); |
177 | pte_val(*pte) = _PAGE_TYPE_EMPTY; | ||
140 | continue; | 178 | continue; |
141 | } | 179 | } |
142 | *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW)); | 180 | *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW)); |
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index a8c2af8c650f..51e5cd9b906a 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c | |||
@@ -19,7 +19,7 @@ | |||
19 | * using the stura instruction. | 19 | * using the stura instruction. |
20 | * Returns the number of bytes copied or -EFAULT. | 20 | * Returns the number of bytes copied or -EFAULT. |
21 | */ | 21 | */ |
22 | static long probe_kernel_write_odd(void *dst, void *src, size_t size) | 22 | static long probe_kernel_write_odd(void *dst, const void *src, size_t size) |
23 | { | 23 | { |
24 | unsigned long count, aligned; | 24 | unsigned long count, aligned; |
25 | int offset, mask; | 25 | int offset, mask; |
@@ -45,7 +45,7 @@ static long probe_kernel_write_odd(void *dst, void *src, size_t size) | |||
45 | return rc ? rc : count; | 45 | return rc ? rc : count; |
46 | } | 46 | } |
47 | 47 | ||
48 | long probe_kernel_write(void *dst, void *src, size_t size) | 48 | long probe_kernel_write(void *dst, const void *src, size_t size) |
49 | { | 49 | { |
50 | long copied = 0; | 50 | long copied = 0; |
51 | 51 | ||
@@ -71,7 +71,7 @@ int memcpy_real(void *dest, void *src, size_t count) | |||
71 | 71 | ||
72 | if (!count) | 72 | if (!count) |
73 | return 0; | 73 | return 0; |
74 | flags = __raw_local_irq_stnsm(0xf8UL); | 74 | flags = __arch_local_irq_stnsm(0xf8UL); |
75 | asm volatile ( | 75 | asm volatile ( |
76 | "0: mvcle %1,%2,0x0\n" | 76 | "0: mvcle %1,%2,0x0\n" |
77 | "1: jo 0b\n" | 77 | "1: jo 0b\n" |
@@ -82,6 +82,6 @@ int memcpy_real(void *dest, void *src, size_t count) | |||
82 | "+d" (_len2), "=m" (*((long *) dest)) | 82 | "+d" (_len2), "=m" (*((long *) dest)) |
83 | : "m" (*((long *) src)) | 83 | : "m" (*((long *) src)) |
84 | : "cc", "memory"); | 84 | : "cc", "memory"); |
85 | __raw_local_irq_ssm(flags); | 85 | arch_local_irq_restore(flags); |
86 | return rc; | 86 | return rc; |
87 | } | 87 | } |
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 869efbaed3ea..c9a9f7f18188 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c | |||
@@ -27,17 +27,44 @@ | |||
27 | #include <linux/personality.h> | 27 | #include <linux/personality.h> |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/random.h> | ||
30 | #include <asm/pgalloc.h> | 31 | #include <asm/pgalloc.h> |
31 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
32 | 33 | ||
34 | static unsigned long stack_maxrandom_size(void) | ||
35 | { | ||
36 | if (!(current->flags & PF_RANDOMIZE)) | ||
37 | return 0; | ||
38 | if (current->personality & ADDR_NO_RANDOMIZE) | ||
39 | return 0; | ||
40 | return STACK_RND_MASK << PAGE_SHIFT; | ||
41 | } | ||
42 | |||
33 | /* | 43 | /* |
34 | * Top of mmap area (just below the process stack). | 44 | * Top of mmap area (just below the process stack). |
35 | * | 45 | * |
36 | * Leave an at least ~128 MB hole. | 46 | * Leave at least a ~32 MB hole. |
37 | */ | 47 | */ |
38 | #define MIN_GAP (128*1024*1024) | 48 | #define MIN_GAP (32*1024*1024) |
39 | #define MAX_GAP (STACK_TOP/6*5) | 49 | #define MAX_GAP (STACK_TOP/6*5) |
40 | 50 | ||
51 | static inline int mmap_is_legacy(void) | ||
52 | { | ||
53 | if (current->personality & ADDR_COMPAT_LAYOUT) | ||
54 | return 1; | ||
55 | if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) | ||
56 | return 1; | ||
57 | return sysctl_legacy_va_layout; | ||
58 | } | ||
59 | |||
60 | static unsigned long mmap_rnd(void) | ||
61 | { | ||
62 | if (!(current->flags & PF_RANDOMIZE)) | ||
63 | return 0; | ||
64 | /* 8MB randomization for mmap_base */ | ||
65 | return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; | ||
66 | } | ||
67 | |||
41 | static inline unsigned long mmap_base(void) | 68 | static inline unsigned long mmap_base(void) |
42 | { | 69 | { |
43 | unsigned long gap = rlimit(RLIMIT_STACK); | 70 | unsigned long gap = rlimit(RLIMIT_STACK); |
@@ -46,22 +73,8 @@ static inline unsigned long mmap_base(void) | |||
46 | gap = MIN_GAP; | 73 | gap = MIN_GAP; |
47 | else if (gap > MAX_GAP) | 74 | else if (gap > MAX_GAP) |
48 | gap = MAX_GAP; | 75 | gap = MAX_GAP; |
49 | 76 | gap &= PAGE_MASK; | |
50 | return STACK_TOP - (gap & PAGE_MASK); | 77 | return STACK_TOP - stack_maxrandom_size() - mmap_rnd() - gap; |
51 | } | ||
52 | |||
53 | static inline int mmap_is_legacy(void) | ||
54 | { | ||
55 | #ifdef CONFIG_64BIT | ||
56 | /* | ||
57 | * Force standard allocation for 64 bit programs. | ||
58 | */ | ||
59 | if (!is_compat_task()) | ||
60 | return 1; | ||
61 | #endif | ||
62 | return sysctl_legacy_va_layout || | ||
63 | (current->personality & ADDR_COMPAT_LAYOUT) || | ||
64 | rlimit(RLIMIT_STACK) == RLIM_INFINITY; | ||
65 | } | 78 | } |
66 | 79 | ||
67 | #ifndef CONFIG_64BIT | 80 | #ifndef CONFIG_64BIT |
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c new file mode 100644 index 000000000000..d013ed39743b --- /dev/null +++ b/arch/s390/mm/pageattr.c | |||
@@ -0,0 +1,61 @@ | |||
1 | /* | ||
2 | * Copyright IBM Corp. 2011 | ||
3 | * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> | ||
4 | */ | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/mm.h> | ||
7 | #include <linux/hugetlb.h> | ||
8 | #include <asm/pgtable.h> | ||
9 | |||
10 | static void change_page_attr(unsigned long addr, int numpages, | ||
11 | pte_t (*set) (pte_t)) | ||
12 | { | ||
13 | pte_t *ptep, pte; | ||
14 | pmd_t *pmdp; | ||
15 | pud_t *pudp; | ||
16 | pgd_t *pgdp; | ||
17 | int i; | ||
18 | |||
19 | for (i = 0; i < numpages; i++) { | ||
20 | pgdp = pgd_offset(&init_mm, addr); | ||
21 | pudp = pud_offset(pgdp, addr); | ||
22 | pmdp = pmd_offset(pudp, addr); | ||
23 | if (pmd_huge(*pmdp)) { | ||
24 | WARN_ON_ONCE(1); | ||
25 | continue; | ||
26 | } | ||
27 | ptep = pte_offset_kernel(pmdp, addr); | ||
28 | |||
29 | pte = *ptep; | ||
30 | pte = set(pte); | ||
31 | __ptep_ipte(addr, ptep); | ||
32 | *ptep = pte; | ||
33 | addr += PAGE_SIZE; | ||
34 | } | ||
35 | } | ||
36 | |||
37 | int set_memory_ro(unsigned long addr, int numpages) | ||
38 | { | ||
39 | change_page_attr(addr, numpages, pte_wrprotect); | ||
40 | return 0; | ||
41 | } | ||
42 | EXPORT_SYMBOL_GPL(set_memory_ro); | ||
43 | |||
44 | int set_memory_rw(unsigned long addr, int numpages) | ||
45 | { | ||
46 | change_page_attr(addr, numpages, pte_mkwrite); | ||
47 | return 0; | ||
48 | } | ||
49 | EXPORT_SYMBOL_GPL(set_memory_rw); | ||
50 | |||
51 | /* not possible */ | ||
52 | int set_memory_nx(unsigned long addr, int numpages) | ||
53 | { | ||
54 | return 0; | ||
55 | } | ||
56 | EXPORT_SYMBOL_GPL(set_memory_nx); | ||
57 | |||
58 | int set_memory_x(unsigned long addr, int numpages) | ||
59 | { | ||
60 | return 0; | ||
61 | } | ||
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 8d999249d357..37a23c223705 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/quicklist.h> | 17 | #include <linux/quicklist.h> |
18 | #include <linux/rcupdate.h> | ||
18 | 19 | ||
19 | #include <asm/system.h> | 20 | #include <asm/system.h> |
20 | #include <asm/pgtable.h> | 21 | #include <asm/pgtable.h> |
@@ -25,30 +26,10 @@ | |||
25 | 26 | ||
26 | #ifndef CONFIG_64BIT | 27 | #ifndef CONFIG_64BIT |
27 | #define ALLOC_ORDER 1 | 28 | #define ALLOC_ORDER 1 |
28 | #define TABLES_PER_PAGE 4 | 29 | #define FRAG_MASK 0x0f |
29 | #define FRAG_MASK 15UL | ||
30 | #define SECOND_HALVES 10UL | ||
31 | |||
32 | void clear_table_pgstes(unsigned long *table) | ||
33 | { | ||
34 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | ||
35 | memset(table + 256, 0, PAGE_SIZE/4); | ||
36 | clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | ||
37 | memset(table + 768, 0, PAGE_SIZE/4); | ||
38 | } | ||
39 | |||
40 | #else | 30 | #else |
41 | #define ALLOC_ORDER 2 | 31 | #define ALLOC_ORDER 2 |
42 | #define TABLES_PER_PAGE 2 | 32 | #define FRAG_MASK 0x03 |
43 | #define FRAG_MASK 3UL | ||
44 | #define SECOND_HALVES 2UL | ||
45 | |||
46 | void clear_table_pgstes(unsigned long *table) | ||
47 | { | ||
48 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | ||
49 | memset(table + 256, 0, PAGE_SIZE/2); | ||
50 | } | ||
51 | |||
52 | #endif | 33 | #endif |
53 | 34 | ||
54 | unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; | 35 | unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; |
@@ -63,37 +44,17 @@ static int __init parse_vmalloc(char *arg) | |||
63 | } | 44 | } |
64 | early_param("vmalloc", parse_vmalloc); | 45 | early_param("vmalloc", parse_vmalloc); |
65 | 46 | ||
66 | unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) | 47 | unsigned long *crst_table_alloc(struct mm_struct *mm) |
67 | { | 48 | { |
68 | struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | 49 | struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); |
69 | 50 | ||
70 | if (!page) | 51 | if (!page) |
71 | return NULL; | 52 | return NULL; |
72 | page->index = 0; | ||
73 | if (noexec) { | ||
74 | struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | ||
75 | if (!shadow) { | ||
76 | __free_pages(page, ALLOC_ORDER); | ||
77 | return NULL; | ||
78 | } | ||
79 | page->index = page_to_phys(shadow); | ||
80 | } | ||
81 | spin_lock(&mm->context.list_lock); | ||
82 | list_add(&page->lru, &mm->context.crst_list); | ||
83 | spin_unlock(&mm->context.list_lock); | ||
84 | return (unsigned long *) page_to_phys(page); | 53 | return (unsigned long *) page_to_phys(page); |
85 | } | 54 | } |
86 | 55 | ||
87 | void crst_table_free(struct mm_struct *mm, unsigned long *table) | 56 | void crst_table_free(struct mm_struct *mm, unsigned long *table) |
88 | { | 57 | { |
89 | unsigned long *shadow = get_shadow_table(table); | ||
90 | struct page *page = virt_to_page(table); | ||
91 | |||
92 | spin_lock(&mm->context.list_lock); | ||
93 | list_del(&page->lru); | ||
94 | spin_unlock(&mm->context.list_lock); | ||
95 | if (shadow) | ||
96 | free_pages((unsigned long) shadow, ALLOC_ORDER); | ||
97 | free_pages((unsigned long) table, ALLOC_ORDER); | 58 | free_pages((unsigned long) table, ALLOC_ORDER); |
98 | } | 59 | } |
99 | 60 | ||
@@ -105,10 +66,10 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) | |||
105 | 66 | ||
106 | BUG_ON(limit > (1UL << 53)); | 67 | BUG_ON(limit > (1UL << 53)); |
107 | repeat: | 68 | repeat: |
108 | table = crst_table_alloc(mm, mm->context.noexec); | 69 | table = crst_table_alloc(mm); |
109 | if (!table) | 70 | if (!table) |
110 | return -ENOMEM; | 71 | return -ENOMEM; |
111 | spin_lock(&mm->page_table_lock); | 72 | spin_lock_bh(&mm->page_table_lock); |
112 | if (mm->context.asce_limit < limit) { | 73 | if (mm->context.asce_limit < limit) { |
113 | pgd = (unsigned long *) mm->pgd; | 74 | pgd = (unsigned long *) mm->pgd; |
114 | if (mm->context.asce_limit <= (1UL << 31)) { | 75 | if (mm->context.asce_limit <= (1UL << 31)) { |
@@ -130,7 +91,7 @@ repeat: | |||
130 | mm->task_size = mm->context.asce_limit; | 91 | mm->task_size = mm->context.asce_limit; |
131 | table = NULL; | 92 | table = NULL; |
132 | } | 93 | } |
133 | spin_unlock(&mm->page_table_lock); | 94 | spin_unlock_bh(&mm->page_table_lock); |
134 | if (table) | 95 | if (table) |
135 | crst_table_free(mm, table); | 96 | crst_table_free(mm, table); |
136 | if (mm->context.asce_limit < limit) | 97 | if (mm->context.asce_limit < limit) |
@@ -172,94 +133,175 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | |||
172 | } | 133 | } |
173 | #endif | 134 | #endif |
174 | 135 | ||
136 | static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) | ||
137 | { | ||
138 | unsigned int old, new; | ||
139 | |||
140 | do { | ||
141 | old = atomic_read(v); | ||
142 | new = old ^ bits; | ||
143 | } while (atomic_cmpxchg(v, old, new) != old); | ||
144 | return new; | ||
145 | } | ||
146 | |||
175 | /* | 147 | /* |
176 | * page table entry allocation/free routines. | 148 | * page table entry allocation/free routines. |
177 | */ | 149 | */ |
150 | #ifdef CONFIG_PGSTE | ||
151 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) | ||
152 | { | ||
153 | struct page *page; | ||
154 | unsigned long *table; | ||
155 | |||
156 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | ||
157 | if (!page) | ||
158 | return NULL; | ||
159 | pgtable_page_ctor(page); | ||
160 | atomic_set(&page->_mapcount, 3); | ||
161 | table = (unsigned long *) page_to_phys(page); | ||
162 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | ||
163 | clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); | ||
164 | return table; | ||
165 | } | ||
166 | |||
167 | static inline void page_table_free_pgste(unsigned long *table) | ||
168 | { | ||
169 | struct page *page; | ||
170 | |||
171 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
172 | pgtable_page_ctor(page); | ||
173 | atomic_set(&page->_mapcount, -1); | ||
174 | __free_page(page); | ||
175 | } | ||
176 | #endif | ||
177 | |||
178 | unsigned long *page_table_alloc(struct mm_struct *mm) | 178 | unsigned long *page_table_alloc(struct mm_struct *mm) |
179 | { | 179 | { |
180 | struct page *page; | 180 | struct page *page; |
181 | unsigned long *table; | 181 | unsigned long *table; |
182 | unsigned long bits; | 182 | unsigned int mask, bit; |
183 | 183 | ||
184 | bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; | 184 | #ifdef CONFIG_PGSTE |
185 | spin_lock(&mm->context.list_lock); | 185 | if (mm_has_pgste(mm)) |
186 | page = NULL; | 186 | return page_table_alloc_pgste(mm); |
187 | #endif | ||
188 | /* Allocate fragments of a 4K page as 1K/2K page table */ | ||
189 | spin_lock_bh(&mm->context.list_lock); | ||
190 | mask = FRAG_MASK; | ||
187 | if (!list_empty(&mm->context.pgtable_list)) { | 191 | if (!list_empty(&mm->context.pgtable_list)) { |
188 | page = list_first_entry(&mm->context.pgtable_list, | 192 | page = list_first_entry(&mm->context.pgtable_list, |
189 | struct page, lru); | 193 | struct page, lru); |
190 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | 194 | table = (unsigned long *) page_to_phys(page); |
191 | page = NULL; | 195 | mask = atomic_read(&page->_mapcount); |
196 | mask = mask | (mask >> 4); | ||
192 | } | 197 | } |
193 | if (!page) { | 198 | if ((mask & FRAG_MASK) == FRAG_MASK) { |
194 | spin_unlock(&mm->context.list_lock); | 199 | spin_unlock_bh(&mm->context.list_lock); |
195 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | 200 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); |
196 | if (!page) | 201 | if (!page) |
197 | return NULL; | 202 | return NULL; |
198 | pgtable_page_ctor(page); | 203 | pgtable_page_ctor(page); |
199 | page->flags &= ~FRAG_MASK; | 204 | atomic_set(&page->_mapcount, 1); |
200 | table = (unsigned long *) page_to_phys(page); | 205 | table = (unsigned long *) page_to_phys(page); |
201 | if (mm->context.has_pgste) | 206 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); |
202 | clear_table_pgstes(table); | 207 | spin_lock_bh(&mm->context.list_lock); |
203 | else | ||
204 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | ||
205 | spin_lock(&mm->context.list_lock); | ||
206 | list_add(&page->lru, &mm->context.pgtable_list); | 208 | list_add(&page->lru, &mm->context.pgtable_list); |
209 | } else { | ||
210 | for (bit = 1; mask & bit; bit <<= 1) | ||
211 | table += PTRS_PER_PTE; | ||
212 | mask = atomic_xor_bits(&page->_mapcount, bit); | ||
213 | if ((mask & FRAG_MASK) == FRAG_MASK) | ||
214 | list_del(&page->lru); | ||
207 | } | 215 | } |
208 | table = (unsigned long *) page_to_phys(page); | 216 | spin_unlock_bh(&mm->context.list_lock); |
209 | while (page->flags & bits) { | ||
210 | table += 256; | ||
211 | bits <<= 1; | ||
212 | } | ||
213 | page->flags |= bits; | ||
214 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | ||
215 | list_move_tail(&page->lru, &mm->context.pgtable_list); | ||
216 | spin_unlock(&mm->context.list_lock); | ||
217 | return table; | 217 | return table; |
218 | } | 218 | } |
219 | 219 | ||
220 | void page_table_free(struct mm_struct *mm, unsigned long *table) | 220 | void page_table_free(struct mm_struct *mm, unsigned long *table) |
221 | { | 221 | { |
222 | struct page *page; | 222 | struct page *page; |
223 | unsigned long bits; | 223 | unsigned int bit, mask; |
224 | 224 | ||
225 | bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; | 225 | #ifdef CONFIG_PGSTE |
226 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); | 226 | if (mm_has_pgste(mm)) |
227 | return page_table_free_pgste(table); | ||
228 | #endif | ||
229 | /* Free 1K/2K page table fragment of a 4K page */ | ||
227 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 230 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
228 | spin_lock(&mm->context.list_lock); | 231 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); |
229 | page->flags ^= bits; | 232 | spin_lock_bh(&mm->context.list_lock); |
230 | if (page->flags & FRAG_MASK) { | 233 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) |
231 | /* Page now has some free pgtable fragments. */ | ||
232 | list_move(&page->lru, &mm->context.pgtable_list); | ||
233 | page = NULL; | ||
234 | } else | ||
235 | /* All fragments of the 4K page have been freed. */ | ||
236 | list_del(&page->lru); | 234 | list_del(&page->lru); |
237 | spin_unlock(&mm->context.list_lock); | 235 | mask = atomic_xor_bits(&page->_mapcount, bit); |
238 | if (page) { | 236 | if (mask & FRAG_MASK) |
237 | list_add(&page->lru, &mm->context.pgtable_list); | ||
238 | spin_unlock_bh(&mm->context.list_lock); | ||
239 | if (mask == 0) { | ||
239 | pgtable_page_dtor(page); | 240 | pgtable_page_dtor(page); |
241 | atomic_set(&page->_mapcount, -1); | ||
240 | __free_page(page); | 242 | __free_page(page); |
241 | } | 243 | } |
242 | } | 244 | } |
243 | 245 | ||
244 | void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) | 246 | #ifdef CONFIG_HAVE_RCU_TABLE_FREE |
247 | |||
248 | static void __page_table_free_rcu(void *table, unsigned bit) | ||
245 | { | 249 | { |
246 | struct page *page; | 250 | struct page *page; |
247 | 251 | ||
248 | spin_lock(&mm->context.list_lock); | 252 | #ifdef CONFIG_PGSTE |
249 | /* Free shadow region and segment tables. */ | 253 | if (bit == FRAG_MASK) |
250 | list_for_each_entry(page, &mm->context.crst_list, lru) | 254 | return page_table_free_pgste(table); |
251 | if (page->index) { | 255 | #endif |
252 | free_pages((unsigned long) page->index, ALLOC_ORDER); | 256 | /* Free 1K/2K page table fragment of a 4K page */ |
253 | page->index = 0; | 257 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
254 | } | 258 | if (atomic_xor_bits(&page->_mapcount, bit) == 0) { |
255 | /* "Free" second halves of page tables. */ | 259 | pgtable_page_dtor(page); |
256 | list_for_each_entry(page, &mm->context.pgtable_list, lru) | 260 | atomic_set(&page->_mapcount, -1); |
257 | page->flags &= ~SECOND_HALVES; | 261 | __free_page(page); |
258 | spin_unlock(&mm->context.list_lock); | 262 | } |
259 | mm->context.noexec = 0; | ||
260 | update_mm(mm, tsk); | ||
261 | } | 263 | } |
262 | 264 | ||
265 | void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) | ||
266 | { | ||
267 | struct mm_struct *mm; | ||
268 | struct page *page; | ||
269 | unsigned int bit, mask; | ||
270 | |||
271 | mm = tlb->mm; | ||
272 | #ifdef CONFIG_PGSTE | ||
273 | if (mm_has_pgste(mm)) { | ||
274 | table = (unsigned long *) (__pa(table) | FRAG_MASK); | ||
275 | tlb_remove_table(tlb, table); | ||
276 | return; | ||
277 | } | ||
278 | #endif | ||
279 | bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); | ||
280 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
281 | spin_lock_bh(&mm->context.list_lock); | ||
282 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | ||
283 | list_del(&page->lru); | ||
284 | mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); | ||
285 | if (mask & FRAG_MASK) | ||
286 | list_add_tail(&page->lru, &mm->context.pgtable_list); | ||
287 | spin_unlock_bh(&mm->context.list_lock); | ||
288 | table = (unsigned long *) (__pa(table) | (bit << 4)); | ||
289 | tlb_remove_table(tlb, table); | ||
290 | } | ||
291 | |||
292 | void __tlb_remove_table(void *_table) | ||
293 | { | ||
294 | void *table = (void *)((unsigned long) _table & PAGE_MASK); | ||
295 | unsigned type = (unsigned long) _table & ~PAGE_MASK; | ||
296 | |||
297 | if (type) | ||
298 | __page_table_free_rcu(table, type); | ||
299 | else | ||
300 | free_pages((unsigned long) table, ALLOC_ORDER); | ||
301 | } | ||
302 | |||
303 | #endif | ||
304 | |||
263 | /* | 305 | /* |
264 | * switch on pgstes for its userspace process (for kvm) | 306 | * switch on pgstes for its userspace process (for kvm) |
265 | */ | 307 | */ |
@@ -273,7 +315,7 @@ int s390_enable_sie(void) | |||
273 | return -EINVAL; | 315 | return -EINVAL; |
274 | 316 | ||
275 | /* Do we have pgstes? if yes, we are done */ | 317 | /* Do we have pgstes? if yes, we are done */ |
276 | if (tsk->mm->context.has_pgste) | 318 | if (mm_has_pgste(tsk->mm)) |
277 | return 0; | 319 | return 0; |
278 | 320 | ||
279 | /* lets check if we are allowed to replace the mm */ | 321 | /* lets check if we are allowed to replace the mm */ |
@@ -312,6 +354,8 @@ int s390_enable_sie(void) | |||
312 | tsk->mm = tsk->active_mm = mm; | 354 | tsk->mm = tsk->active_mm = mm; |
313 | preempt_disable(); | 355 | preempt_disable(); |
314 | update_mm(mm, tsk); | 356 | update_mm(mm, tsk); |
357 | atomic_inc(&mm->context.attach_count); | ||
358 | atomic_dec(&old_mm->context.attach_count); | ||
315 | cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); | 359 | cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); |
316 | preempt_enable(); | 360 | preempt_enable(); |
317 | task_unlock(tsk); | 361 | task_unlock(tsk); |
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 34c43f23b28c..8c1970d1dd91 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c | |||
@@ -95,7 +95,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | |||
95 | pu_dir = vmem_pud_alloc(); | 95 | pu_dir = vmem_pud_alloc(); |
96 | if (!pu_dir) | 96 | if (!pu_dir) |
97 | goto out; | 97 | goto out; |
98 | pgd_populate_kernel(&init_mm, pg_dir, pu_dir); | 98 | pgd_populate(&init_mm, pg_dir, pu_dir); |
99 | } | 99 | } |
100 | 100 | ||
101 | pu_dir = pud_offset(pg_dir, address); | 101 | pu_dir = pud_offset(pg_dir, address); |
@@ -103,7 +103,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | |||
103 | pm_dir = vmem_pmd_alloc(); | 103 | pm_dir = vmem_pmd_alloc(); |
104 | if (!pm_dir) | 104 | if (!pm_dir) |
105 | goto out; | 105 | goto out; |
106 | pud_populate_kernel(&init_mm, pu_dir, pm_dir); | 106 | pud_populate(&init_mm, pu_dir, pm_dir); |
107 | } | 107 | } |
108 | 108 | ||
109 | pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); | 109 | pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); |
@@ -123,7 +123,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | |||
123 | pt_dir = vmem_pte_alloc(); | 123 | pt_dir = vmem_pte_alloc(); |
124 | if (!pt_dir) | 124 | if (!pt_dir) |
125 | goto out; | 125 | goto out; |
126 | pmd_populate_kernel(&init_mm, pm_dir, pt_dir); | 126 | pmd_populate(&init_mm, pm_dir, pt_dir); |
127 | } | 127 | } |
128 | 128 | ||
129 | pt_dir = pte_offset_kernel(pm_dir, address); | 129 | pt_dir = pte_offset_kernel(pm_dir, address); |
@@ -159,7 +159,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) | |||
159 | continue; | 159 | continue; |
160 | 160 | ||
161 | if (pmd_huge(*pm_dir)) { | 161 | if (pmd_huge(*pm_dir)) { |
162 | pmd_clear_kernel(pm_dir); | 162 | pmd_clear(pm_dir); |
163 | address += HPAGE_SIZE - PAGE_SIZE; | 163 | address += HPAGE_SIZE - PAGE_SIZE; |
164 | continue; | 164 | continue; |
165 | } | 165 | } |
@@ -192,7 +192,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) | |||
192 | pu_dir = vmem_pud_alloc(); | 192 | pu_dir = vmem_pud_alloc(); |
193 | if (!pu_dir) | 193 | if (!pu_dir) |
194 | goto out; | 194 | goto out; |
195 | pgd_populate_kernel(&init_mm, pg_dir, pu_dir); | 195 | pgd_populate(&init_mm, pg_dir, pu_dir); |
196 | } | 196 | } |
197 | 197 | ||
198 | pu_dir = pud_offset(pg_dir, address); | 198 | pu_dir = pud_offset(pg_dir, address); |
@@ -200,7 +200,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) | |||
200 | pm_dir = vmem_pmd_alloc(); | 200 | pm_dir = vmem_pmd_alloc(); |
201 | if (!pm_dir) | 201 | if (!pm_dir) |
202 | goto out; | 202 | goto out; |
203 | pud_populate_kernel(&init_mm, pu_dir, pm_dir); | 203 | pud_populate(&init_mm, pu_dir, pm_dir); |
204 | } | 204 | } |
205 | 205 | ||
206 | pm_dir = pmd_offset(pu_dir, address); | 206 | pm_dir = pmd_offset(pu_dir, address); |
@@ -208,7 +208,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) | |||
208 | pt_dir = vmem_pte_alloc(); | 208 | pt_dir = vmem_pte_alloc(); |
209 | if (!pt_dir) | 209 | if (!pt_dir) |
210 | goto out; | 210 | goto out; |
211 | pmd_populate_kernel(&init_mm, pm_dir, pt_dir); | 211 | pmd_populate(&init_mm, pm_dir, pt_dir); |
212 | } | 212 | } |
213 | 213 | ||
214 | pt_dir = pte_offset_kernel(pm_dir, address); | 214 | pt_dir = pte_offset_kernel(pm_dir, address); |