aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/Makefile3
-rw-r--r--arch/s390/mm/cmm.c9
-rw-r--r--arch/s390/mm/extmem.c6
-rw-r--r--arch/s390/mm/fault.c321
-rw-r--r--arch/s390/mm/gup.c224
-rw-r--r--arch/s390/mm/hugetlbpage.c12
-rw-r--r--arch/s390/mm/init.c60
-rw-r--r--arch/s390/mm/maccess.c8
-rw-r--r--arch/s390/mm/mmap.c49
-rw-r--r--arch/s390/mm/pageattr.c61
-rw-r--r--arch/s390/mm/pgtable.c238
-rw-r--r--arch/s390/mm/vmem.c14
12 files changed, 712 insertions, 293 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index eec054484419..d98fe9004a52 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -3,6 +3,7 @@
3# 3#
4 4
5obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ 5obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \
6 page-states.o 6 page-states.o gup.o
7obj-$(CONFIG_CMM) += cmm.o 7obj-$(CONFIG_CMM) += cmm.o
8obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 8obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
9obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index a9550dca3e4b..1f1dba9dcf58 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -23,7 +23,10 @@
23#include <asm/pgalloc.h> 23#include <asm/pgalloc.h>
24#include <asm/diag.h> 24#include <asm/diag.h>
25 25
26static char *sender = "VMRMSVM"; 26#ifdef CONFIG_CMM_IUCV
27static char *cmm_default_sender = "VMRMSVM";
28#endif
29static char *sender;
27module_param(sender, charp, 0400); 30module_param(sender, charp, 0400);
28MODULE_PARM_DESC(sender, 31MODULE_PARM_DESC(sender,
29 "Guest name that may send SMSG messages (default VMRMSVM)"); 32 "Guest name that may send SMSG messages (default VMRMSVM)");
@@ -88,7 +91,7 @@ static long cmm_alloc_pages(long nr, long *counter,
88 } else 91 } else
89 free_page((unsigned long) npa); 92 free_page((unsigned long) npa);
90 } 93 }
91 diag10(addr); 94 diag10_range(addr >> PAGE_SHIFT, 1);
92 pa->pages[pa->index++] = addr; 95 pa->pages[pa->index++] = addr;
93 (*counter)++; 96 (*counter)++;
94 spin_unlock(&cmm_lock); 97 spin_unlock(&cmm_lock);
@@ -440,6 +443,8 @@ static int __init cmm_init(void)
440 int len = strlen(sender); 443 int len = strlen(sender);
441 while (len--) 444 while (len--)
442 sender[len] = toupper(sender[len]); 445 sender[len] = toupper(sender[len]);
446 } else {
447 sender = cmm_default_sender;
443 } 448 }
444 449
445 rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); 450 rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target);
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 3cc95dd0a3a6..075ddada4911 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -412,6 +412,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
412 struct dcss_segment *seg; 412 struct dcss_segment *seg;
413 int rc, diag_cc; 413 int rc, diag_cc;
414 414
415 start_addr = end_addr = 0;
415 seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA); 416 seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA);
416 if (seg == NULL) { 417 if (seg == NULL) {
417 rc = -ENOMEM; 418 rc = -ENOMEM;
@@ -573,6 +574,7 @@ segment_modify_shared (char *name, int do_nonshared)
573 unsigned long start_addr, end_addr, dummy; 574 unsigned long start_addr, end_addr, dummy;
574 int rc, diag_cc; 575 int rc, diag_cc;
575 576
577 start_addr = end_addr = 0;
576 mutex_lock(&dcss_lock); 578 mutex_lock(&dcss_lock);
577 seg = segment_by_name (name); 579 seg = segment_by_name (name);
578 if (seg == NULL) { 580 if (seg == NULL) {
@@ -681,8 +683,6 @@ void
681segment_save(char *name) 683segment_save(char *name)
682{ 684{
683 struct dcss_segment *seg; 685 struct dcss_segment *seg;
684 int startpfn = 0;
685 int endpfn = 0;
686 char cmd1[160]; 686 char cmd1[160];
687 char cmd2[80]; 687 char cmd2[80];
688 int i, response; 688 int i, response;
@@ -698,8 +698,6 @@ segment_save(char *name)
698 goto out; 698 goto out;
699 } 699 }
700 700
701 startpfn = seg->start_addr >> PAGE_SHIFT;
702 endpfn = (seg->end) >> PAGE_SHIFT;
703 sprintf(cmd1, "DEFSEG %s", name); 701 sprintf(cmd1, "DEFSEG %s", name);
704 for (i=0; i<seg->segcnt; i++) { 702 for (i=0; i<seg->segcnt; i++) {
705 sprintf(cmd1+strlen(cmd1), " %lX-%lX %s", 703 sprintf(cmd1+strlen(cmd1), " %lX-%lX %s",
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2505b2ea0ef1..fe103e891e7a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -10,6 +10,7 @@
10 * Copyright (C) 1995 Linus Torvalds 10 * Copyright (C) 1995 Linus Torvalds
11 */ 11 */
12 12
13#include <linux/kernel_stat.h>
13#include <linux/perf_event.h> 14#include <linux/perf_event.h>
14#include <linux/signal.h> 15#include <linux/signal.h>
15#include <linux/sched.h> 16#include <linux/sched.h>
@@ -33,7 +34,7 @@
33#include <asm/asm-offsets.h> 34#include <asm/asm-offsets.h>
34#include <asm/system.h> 35#include <asm/system.h>
35#include <asm/pgtable.h> 36#include <asm/pgtable.h>
36#include <asm/s390_ext.h> 37#include <asm/irq.h>
37#include <asm/mmu_context.h> 38#include <asm/mmu_context.h>
38#include <asm/compat.h> 39#include <asm/compat.h>
39#include "../kernel/entry.h" 40#include "../kernel/entry.h"
@@ -52,6 +53,14 @@
52#define VM_FAULT_BADMAP 0x020000 53#define VM_FAULT_BADMAP 0x020000
53#define VM_FAULT_BADACCESS 0x040000 54#define VM_FAULT_BADACCESS 0x040000
54 55
56static unsigned long store_indication;
57
58void fault_init(void)
59{
60 if (test_facility(2) && test_facility(75))
61 store_indication = 0xc00;
62}
63
55static inline int notify_page_fault(struct pt_regs *regs) 64static inline int notify_page_fault(struct pt_regs *regs)
56{ 65{
57 int ret = 0; 66 int ret = 0;
@@ -199,42 +208,22 @@ static noinline void do_sigbus(struct pt_regs *regs, long int_code,
199 unsigned long trans_exc_code) 208 unsigned long trans_exc_code)
200{ 209{
201 struct task_struct *tsk = current; 210 struct task_struct *tsk = current;
211 unsigned long address;
212 struct siginfo si;
202 213
203 /* 214 /*
204 * Send a sigbus, regardless of whether we were in kernel 215 * Send a sigbus, regardless of whether we were in kernel
205 * or user mode. 216 * or user mode.
206 */ 217 */
207 tsk->thread.prot_addr = trans_exc_code & __FAIL_ADDR_MASK; 218 address = trans_exc_code & __FAIL_ADDR_MASK;
219 tsk->thread.prot_addr = address;
208 tsk->thread.trap_no = int_code; 220 tsk->thread.trap_no = int_code;
209 force_sig(SIGBUS, tsk); 221 si.si_signo = SIGBUS;
210} 222 si.si_errno = 0;
211 223 si.si_code = BUS_ADRERR;
212#ifdef CONFIG_S390_EXEC_PROTECT 224 si.si_addr = (void __user *) address;
213static noinline int signal_return(struct pt_regs *regs, long int_code, 225 force_sig_info(SIGBUS, &si, tsk);
214 unsigned long trans_exc_code)
215{
216 u16 instruction;
217 int rc;
218
219 rc = __get_user(instruction, (u16 __user *) regs->psw.addr);
220
221 if (!rc && instruction == 0x0a77) {
222 clear_tsk_thread_flag(current, TIF_SINGLE_STEP);
223 if (is_compat_task())
224 sys32_sigreturn();
225 else
226 sys_sigreturn();
227 } else if (!rc && instruction == 0x0aad) {
228 clear_tsk_thread_flag(current, TIF_SINGLE_STEP);
229 if (is_compat_task())
230 sys32_rt_sigreturn();
231 else
232 sys_rt_sigreturn();
233 } else
234 do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code);
235 return 0;
236} 226}
237#endif /* CONFIG_S390_EXEC_PROTECT */
238 227
239static noinline void do_fault_error(struct pt_regs *regs, long int_code, 228static noinline void do_fault_error(struct pt_regs *regs, long int_code,
240 unsigned long trans_exc_code, int fault) 229 unsigned long trans_exc_code, int fault)
@@ -243,13 +232,6 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code,
243 232
244 switch (fault) { 233 switch (fault) {
245 case VM_FAULT_BADACCESS: 234 case VM_FAULT_BADACCESS:
246#ifdef CONFIG_S390_EXEC_PROTECT
247 if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY &&
248 (trans_exc_code & 3) == 0) {
249 signal_return(regs, int_code, trans_exc_code);
250 break;
251 }
252#endif /* CONFIG_S390_EXEC_PROTECT */
253 case VM_FAULT_BADMAP: 235 case VM_FAULT_BADMAP:
254 /* Bad memory access. Check if it is kernel or user space. */ 236 /* Bad memory access. Check if it is kernel or user space. */
255 if (regs->psw.mask & PSW_MASK_PSTATE) { 237 if (regs->psw.mask & PSW_MASK_PSTATE) {
@@ -263,13 +245,17 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code,
263 do_no_context(regs, int_code, trans_exc_code); 245 do_no_context(regs, int_code, trans_exc_code);
264 break; 246 break;
265 default: /* fault & VM_FAULT_ERROR */ 247 default: /* fault & VM_FAULT_ERROR */
266 if (fault & VM_FAULT_OOM) 248 if (fault & VM_FAULT_OOM) {
267 pagefault_out_of_memory(); 249 if (!(regs->psw.mask & PSW_MASK_PSTATE))
268 else if (fault & VM_FAULT_SIGBUS) { 250 do_no_context(regs, int_code, trans_exc_code);
269 do_sigbus(regs, int_code, trans_exc_code); 251 else
252 pagefault_out_of_memory();
253 } else if (fault & VM_FAULT_SIGBUS) {
270 /* Kernel mode? Handle exceptions or die */ 254 /* Kernel mode? Handle exceptions or die */
271 if (!(regs->psw.mask & PSW_MASK_PSTATE)) 255 if (!(regs->psw.mask & PSW_MASK_PSTATE))
272 do_no_context(regs, int_code, trans_exc_code); 256 do_no_context(regs, int_code, trans_exc_code);
257 else
258 do_sigbus(regs, int_code, trans_exc_code);
273 } else 259 } else
274 BUG(); 260 BUG();
275 break; 261 break;
@@ -294,6 +280,7 @@ static inline int do_exception(struct pt_regs *regs, int access,
294 struct mm_struct *mm; 280 struct mm_struct *mm;
295 struct vm_area_struct *vma; 281 struct vm_area_struct *vma;
296 unsigned long address; 282 unsigned long address;
283 unsigned int flags;
297 int fault; 284 int fault;
298 285
299 if (notify_page_fault(regs)) 286 if (notify_page_fault(regs))
@@ -312,13 +299,11 @@ static inline int do_exception(struct pt_regs *regs, int access,
312 goto out; 299 goto out;
313 300
314 address = trans_exc_code & __FAIL_ADDR_MASK; 301 address = trans_exc_code & __FAIL_ADDR_MASK;
315 /*
316 * When we get here, the fault happened in the current
317 * task's user address space, so we can switch on the
318 * interrupts again and then search the VMAs
319 */
320 local_irq_enable();
321 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 302 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
303 flags = FAULT_FLAG_ALLOW_RETRY;
304 if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
305 flags |= FAULT_FLAG_WRITE;
306retry:
322 down_read(&mm->mmap_sem); 307 down_read(&mm->mmap_sem);
323 308
324 fault = VM_FAULT_BADMAP; 309 fault = VM_FAULT_BADMAP;
@@ -348,25 +333,37 @@ static inline int do_exception(struct pt_regs *regs, int access,
348 * make sure we exit gracefully rather than endlessly redo 333 * make sure we exit gracefully rather than endlessly redo
349 * the fault. 334 * the fault.
350 */ 335 */
351 fault = handle_mm_fault(mm, vma, address, 336 fault = handle_mm_fault(mm, vma, address, flags);
352 (access == VM_WRITE) ? FAULT_FLAG_WRITE : 0);
353 if (unlikely(fault & VM_FAULT_ERROR)) 337 if (unlikely(fault & VM_FAULT_ERROR))
354 goto out_up; 338 goto out_up;
355 339
356 if (fault & VM_FAULT_MAJOR) { 340 /*
357 tsk->maj_flt++; 341 * Major/minor page fault accounting is only done on the
358 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 342 * initial attempt. If we go through a retry, it is extremely
359 regs, address); 343 * likely that the page will be found in page cache at that point.
360 } else { 344 */
361 tsk->min_flt++; 345 if (flags & FAULT_FLAG_ALLOW_RETRY) {
362 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 346 if (fault & VM_FAULT_MAJOR) {
363 regs, address); 347 tsk->maj_flt++;
348 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
349 regs, address);
350 } else {
351 tsk->min_flt++;
352 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
353 regs, address);
354 }
355 if (fault & VM_FAULT_RETRY) {
356 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
357 * of starvation. */
358 flags &= ~FAULT_FLAG_ALLOW_RETRY;
359 goto retry;
360 }
364 } 361 }
365 /* 362 /*
366 * The instruction that caused the program check will 363 * The instruction that caused the program check will
367 * be repeated. Don't signal single step via SIGTRAP. 364 * be repeated. Don't signal single step via SIGTRAP.
368 */ 365 */
369 clear_tsk_thread_flag(tsk, TIF_SINGLE_STEP); 366 clear_tsk_thread_flag(tsk, TIF_PER_TRAP);
370 fault = 0; 367 fault = 0;
371out_up: 368out_up:
372 up_read(&mm->mmap_sem); 369 up_read(&mm->mmap_sem);
@@ -374,20 +371,20 @@ out:
374 return fault; 371 return fault;
375} 372}
376 373
377void __kprobes do_protection_exception(struct pt_regs *regs, long int_code) 374void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code,
375 unsigned long trans_exc_code)
378{ 376{
379 unsigned long trans_exc_code = S390_lowcore.trans_exc_code;
380 int fault; 377 int fault;
381 378
382 /* Protection exception is supressing, decrement psw address. */ 379 /* Protection exception is suppressing, decrement psw address. */
383 regs->psw.addr -= (int_code >> 16); 380 regs->psw.addr -= (pgm_int_code >> 16);
384 /* 381 /*
385 * Check for low-address protection. This needs to be treated 382 * Check for low-address protection. This needs to be treated
386 * as a special case because the translation exception code 383 * as a special case because the translation exception code
387 * field is not guaranteed to contain valid data in this case. 384 * field is not guaranteed to contain valid data in this case.
388 */ 385 */
389 if (unlikely(!(trans_exc_code & 4))) { 386 if (unlikely(!(trans_exc_code & 4))) {
390 do_low_address(regs, int_code, trans_exc_code); 387 do_low_address(regs, pgm_int_code, trans_exc_code);
391 return; 388 return;
392 } 389 }
393 fault = do_exception(regs, VM_WRITE, trans_exc_code); 390 fault = do_exception(regs, VM_WRITE, trans_exc_code);
@@ -395,34 +392,27 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long int_code)
395 do_fault_error(regs, 4, trans_exc_code, fault); 392 do_fault_error(regs, 4, trans_exc_code, fault);
396} 393}
397 394
398void __kprobes do_dat_exception(struct pt_regs *regs, long int_code) 395void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code,
396 unsigned long trans_exc_code)
399{ 397{
400 unsigned long trans_exc_code = S390_lowcore.trans_exc_code;
401 int access, fault; 398 int access, fault;
402 399
403 access = VM_READ | VM_EXEC | VM_WRITE; 400 access = VM_READ | VM_EXEC | VM_WRITE;
404#ifdef CONFIG_S390_EXEC_PROTECT
405 if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY &&
406 (trans_exc_code & 3) == 0)
407 access = VM_EXEC;
408#endif
409 fault = do_exception(regs, access, trans_exc_code); 401 fault = do_exception(regs, access, trans_exc_code);
410 if (unlikely(fault)) 402 if (unlikely(fault))
411 do_fault_error(regs, int_code & 255, trans_exc_code, fault); 403 do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault);
412} 404}
413 405
414#ifdef CONFIG_64BIT 406#ifdef CONFIG_64BIT
415void __kprobes do_asce_exception(struct pt_regs *regs, long int_code) 407void __kprobes do_asce_exception(struct pt_regs *regs, long pgm_int_code,
408 unsigned long trans_exc_code)
416{ 409{
417 unsigned long trans_exc_code = S390_lowcore.trans_exc_code;
418 struct mm_struct *mm = current->mm; 410 struct mm_struct *mm = current->mm;
419 struct vm_area_struct *vma; 411 struct vm_area_struct *vma;
420 412
421 if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) 413 if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
422 goto no_context; 414 goto no_context;
423 415
424 local_irq_enable();
425
426 down_read(&mm->mmap_sem); 416 down_read(&mm->mmap_sem);
427 vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK); 417 vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK);
428 up_read(&mm->mmap_sem); 418 up_read(&mm->mmap_sem);
@@ -434,16 +424,16 @@ void __kprobes do_asce_exception(struct pt_regs *regs, long int_code)
434 424
435 /* User mode accesses just cause a SIGSEGV */ 425 /* User mode accesses just cause a SIGSEGV */
436 if (regs->psw.mask & PSW_MASK_PSTATE) { 426 if (regs->psw.mask & PSW_MASK_PSTATE) {
437 do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); 427 do_sigsegv(regs, pgm_int_code, SEGV_MAPERR, trans_exc_code);
438 return; 428 return;
439 } 429 }
440 430
441no_context: 431no_context:
442 do_no_context(regs, int_code, trans_exc_code); 432 do_no_context(regs, pgm_int_code, trans_exc_code);
443} 433}
444#endif 434#endif
445 435
446int __handle_fault(unsigned long uaddr, unsigned long int_code, int write_user) 436int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write)
447{ 437{
448 struct pt_regs regs; 438 struct pt_regs regs;
449 int access, fault; 439 int access, fault;
@@ -454,14 +444,13 @@ int __handle_fault(unsigned long uaddr, unsigned long int_code, int write_user)
454 regs.psw.addr = (unsigned long) __builtin_return_address(0); 444 regs.psw.addr = (unsigned long) __builtin_return_address(0);
455 regs.psw.addr |= PSW_ADDR_AMODE; 445 regs.psw.addr |= PSW_ADDR_AMODE;
456 uaddr &= PAGE_MASK; 446 uaddr &= PAGE_MASK;
457 access = write_user ? VM_WRITE : VM_READ; 447 access = write ? VM_WRITE : VM_READ;
458 fault = do_exception(&regs, access, uaddr | 2); 448 fault = do_exception(&regs, access, uaddr | 2);
459 if (unlikely(fault)) { 449 if (unlikely(fault)) {
460 if (fault & VM_FAULT_OOM) { 450 if (fault & VM_FAULT_OOM)
461 pagefault_out_of_memory(); 451 return -EFAULT;
462 fault = 0; 452 else if (fault & VM_FAULT_SIGBUS)
463 } else if (fault & VM_FAULT_SIGBUS) 453 do_sigbus(&regs, pgm_int_code, uaddr);
464 do_sigbus(&regs, int_code, uaddr);
465 } 454 }
466 return fault ? -EFAULT : 0; 455 return fault ? -EFAULT : 0;
467} 456}
@@ -470,8 +459,7 @@ int __handle_fault(unsigned long uaddr, unsigned long int_code, int write_user)
470/* 459/*
471 * 'pfault' pseudo page faults routines. 460 * 'pfault' pseudo page faults routines.
472 */ 461 */
473static ext_int_info_t ext_int_pfault; 462static int pfault_disable;
474static int pfault_disable = 0;
475 463
476static int __init nopfault(char *str) 464static int __init nopfault(char *str)
477{ 465{
@@ -481,22 +469,28 @@ static int __init nopfault(char *str)
481 469
482__setup("nopfault", nopfault); 470__setup("nopfault", nopfault);
483 471
484typedef struct { 472struct pfault_refbk {
485 __u16 refdiagc; 473 u16 refdiagc;
486 __u16 reffcode; 474 u16 reffcode;
487 __u16 refdwlen; 475 u16 refdwlen;
488 __u16 refversn; 476 u16 refversn;
489 __u64 refgaddr; 477 u64 refgaddr;
490 __u64 refselmk; 478 u64 refselmk;
491 __u64 refcmpmk; 479 u64 refcmpmk;
492 __u64 reserved; 480 u64 reserved;
493} __attribute__ ((packed, aligned(8))) pfault_refbk_t; 481} __attribute__ ((packed, aligned(8)));
494 482
495int pfault_init(void) 483int pfault_init(void)
496{ 484{
497 pfault_refbk_t refbk = 485 struct pfault_refbk refbk = {
498 { 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48, 486 .refdiagc = 0x258,
499 __PF_RES_FIELD }; 487 .reffcode = 0,
488 .refdwlen = 5,
489 .refversn = 2,
490 .refgaddr = __LC_CURRENT_PID,
491 .refselmk = 1ULL << 48,
492 .refcmpmk = 1ULL << 48,
493 .reserved = __PF_RES_FIELD };
500 int rc; 494 int rc;
501 495
502 if (!MACHINE_IS_VM || pfault_disable) 496 if (!MACHINE_IS_VM || pfault_disable)
@@ -508,18 +502,20 @@ int pfault_init(void)
508 "2:\n" 502 "2:\n"
509 EX_TABLE(0b,1b) 503 EX_TABLE(0b,1b)
510 : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc"); 504 : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc");
511 __ctl_set_bit(0, 9);
512 return rc; 505 return rc;
513} 506}
514 507
515void pfault_fini(void) 508void pfault_fini(void)
516{ 509{
517 pfault_refbk_t refbk = 510 struct pfault_refbk refbk = {
518 { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL }; 511 .refdiagc = 0x258,
512 .reffcode = 1,
513 .refdwlen = 5,
514 .refversn = 2,
515 };
519 516
520 if (!MACHINE_IS_VM || pfault_disable) 517 if (!MACHINE_IS_VM || pfault_disable)
521 return; 518 return;
522 __ctl_clear_bit(0,9);
523 asm volatile( 519 asm volatile(
524 " diag %0,0,0x258\n" 520 " diag %0,0,0x258\n"
525 "0:\n" 521 "0:\n"
@@ -527,10 +523,15 @@ void pfault_fini(void)
527 : : "a" (&refbk), "m" (refbk) : "cc"); 523 : : "a" (&refbk), "m" (refbk) : "cc");
528} 524}
529 525
530static void pfault_interrupt(__u16 int_code) 526static DEFINE_SPINLOCK(pfault_lock);
527static LIST_HEAD(pfault_list);
528
529static void pfault_interrupt(unsigned int ext_int_code,
530 unsigned int param32, unsigned long param64)
531{ 531{
532 struct task_struct *tsk; 532 struct task_struct *tsk;
533 __u16 subcode; 533 __u16 subcode;
534 pid_t pid;
534 535
535 /* 536 /*
536 * Get the external interruption subcode & pfault 537 * Get the external interruption subcode & pfault
@@ -538,63 +539,107 @@ static void pfault_interrupt(__u16 int_code)
538 * in the 'cpu address' field associated with the 539 * in the 'cpu address' field associated with the
539 * external interrupt. 540 * external interrupt.
540 */ 541 */
541 subcode = S390_lowcore.cpu_addr; 542 subcode = ext_int_code >> 16;
542 if ((subcode & 0xff00) != __SUBCODE_MASK) 543 if ((subcode & 0xff00) != __SUBCODE_MASK)
543 return; 544 return;
544 545 kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
545 /* 546 if (subcode & 0x0080) {
546 * Get the token (= address of the task structure of the affected task). 547 /* Get the token (= pid of the affected task). */
547 */ 548 pid = sizeof(void *) == 4 ? param32 : param64;
548 tsk = *(struct task_struct **) __LC_PFAULT_INTPARM; 549 rcu_read_lock();
549 550 tsk = find_task_by_pid_ns(pid, &init_pid_ns);
551 if (tsk)
552 get_task_struct(tsk);
553 rcu_read_unlock();
554 if (!tsk)
555 return;
556 } else {
557 tsk = current;
558 }
559 spin_lock(&pfault_lock);
550 if (subcode & 0x0080) { 560 if (subcode & 0x0080) {
551 /* signal bit is set -> a page has been swapped in by VM */ 561 /* signal bit is set -> a page has been swapped in by VM */
552 if (xchg(&tsk->thread.pfault_wait, -1) != 0) { 562 if (tsk->thread.pfault_wait == 1) {
553 /* Initial interrupt was faster than the completion 563 /* Initial interrupt was faster than the completion
554 * interrupt. pfault_wait is valid. Set pfault_wait 564 * interrupt. pfault_wait is valid. Set pfault_wait
555 * back to zero and wake up the process. This can 565 * back to zero and wake up the process. This can
556 * safely be done because the task is still sleeping 566 * safely be done because the task is still sleeping
557 * and can't produce new pfaults. */ 567 * and can't produce new pfaults. */
558 tsk->thread.pfault_wait = 0; 568 tsk->thread.pfault_wait = 0;
569 list_del(&tsk->thread.list);
559 wake_up_process(tsk); 570 wake_up_process(tsk);
560 put_task_struct(tsk); 571 } else {
572 /* Completion interrupt was faster than initial
573 * interrupt. Set pfault_wait to -1 so the initial
574 * interrupt doesn't put the task to sleep. */
575 tsk->thread.pfault_wait = -1;
561 } 576 }
577 put_task_struct(tsk);
562 } else { 578 } else {
563 /* signal bit not set -> a real page is missing. */ 579 /* signal bit not set -> a real page is missing. */
564 get_task_struct(tsk); 580 if (tsk->thread.pfault_wait == -1) {
565 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
566 if (xchg(&tsk->thread.pfault_wait, 1) != 0) {
567 /* Completion interrupt was faster than the initial 581 /* Completion interrupt was faster than the initial
568 * interrupt (swapped in a -1 for pfault_wait). Set 582 * interrupt (pfault_wait == -1). Set pfault_wait
569 * pfault_wait back to zero and exit. This can be 583 * back to zero and exit. */
570 * done safely because tsk is running in kernel
571 * mode and can't produce new pfaults. */
572 tsk->thread.pfault_wait = 0; 584 tsk->thread.pfault_wait = 0;
573 set_task_state(tsk, TASK_RUNNING); 585 } else {
574 put_task_struct(tsk); 586 /* Initial interrupt arrived before completion
575 } else 587 * interrupt. Let the task sleep. */
588 tsk->thread.pfault_wait = 1;
589 list_add(&tsk->thread.list, &pfault_list);
590 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
576 set_tsk_need_resched(tsk); 591 set_tsk_need_resched(tsk);
592 }
577 } 593 }
594 spin_unlock(&pfault_lock);
578} 595}
579 596
580void __init pfault_irq_init(void) 597static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
598 unsigned long action, void *hcpu)
581{ 599{
582 if (!MACHINE_IS_VM) 600 struct thread_struct *thread, *next;
583 return; 601 struct task_struct *tsk;
584 602
585 /* 603 switch (action) {
586 * Try to get pfault pseudo page faults going. 604 case CPU_DEAD:
587 */ 605 case CPU_DEAD_FROZEN:
588 if (register_early_external_interrupt(0x2603, pfault_interrupt, 606 spin_lock_irq(&pfault_lock);
589 &ext_int_pfault) != 0) 607 list_for_each_entry_safe(thread, next, &pfault_list, list) {
590 panic("Couldn't request external interrupt 0x2603"); 608 thread->pfault_wait = 0;
609 list_del(&thread->list);
610 tsk = container_of(thread, struct task_struct, thread);
611 wake_up_process(tsk);
612 }
613 spin_unlock_irq(&pfault_lock);
614 break;
615 default:
616 break;
617 }
618 return NOTIFY_OK;
619}
591 620
592 if (pfault_init() == 0) 621static int __init pfault_irq_init(void)
593 return; 622{
623 int rc;
594 624
595 /* Tough luck, no pfault. */ 625 if (!MACHINE_IS_VM)
626 return 0;
627 rc = register_external_interrupt(0x2603, pfault_interrupt);
628 if (rc)
629 goto out_extint;
630 rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
631 if (rc)
632 goto out_pfault;
633 service_subclass_irq_register();
634 hotcpu_notifier(pfault_cpu_notify, 0);
635 return 0;
636
637out_pfault:
638 unregister_external_interrupt(0x2603, pfault_interrupt);
639out_extint:
596 pfault_disable = 1; 640 pfault_disable = 1;
597 unregister_early_external_interrupt(0x2603, pfault_interrupt, 641 return rc;
598 &ext_int_pfault);
599} 642}
600#endif 643early_initcall(pfault_irq_init);
644
645#endif /* CONFIG_PFAULT */
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
new file mode 100644
index 000000000000..45b405ca2567
--- /dev/null
+++ b/arch/s390/mm/gup.c
@@ -0,0 +1,224 @@
1/*
2 * Lockless get_user_pages_fast for s390
3 *
4 * Copyright IBM Corp. 2010
5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 */
7#include <linux/sched.h>
8#include <linux/mm.h>
9#include <linux/hugetlb.h>
10#include <linux/vmstat.h>
11#include <linux/pagemap.h>
12#include <linux/rwsem.h>
13#include <asm/pgtable.h>
14
15/*
16 * The performance critical leaf functions are made noinline otherwise gcc
17 * inlines everything into a single function which results in too much
18 * register pressure.
19 */
20static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
21 unsigned long end, int write, struct page **pages, int *nr)
22{
23 unsigned long mask;
24 pte_t *ptep, pte;
25 struct page *page;
26
27 mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
28
29 ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
30 do {
31 pte = *ptep;
32 barrier();
33 if ((pte_val(pte) & mask) != 0)
34 return 0;
35 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
36 page = pte_page(pte);
37 if (!page_cache_get_speculative(page))
38 return 0;
39 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
40 put_page(page);
41 return 0;
42 }
43 pages[*nr] = page;
44 (*nr)++;
45
46 } while (ptep++, addr += PAGE_SIZE, addr != end);
47
48 return 1;
49}
50
51static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
52 unsigned long end, int write, struct page **pages, int *nr)
53{
54 unsigned long mask, result;
55 struct page *head, *page;
56 int refs;
57
58 result = write ? 0 : _SEGMENT_ENTRY_RO;
59 mask = result | _SEGMENT_ENTRY_INV;
60 if ((pmd_val(pmd) & mask) != result)
61 return 0;
62 VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
63
64 refs = 0;
65 head = pmd_page(pmd);
66 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
67 do {
68 VM_BUG_ON(compound_head(page) != head);
69 pages[*nr] = page;
70 (*nr)++;
71 page++;
72 refs++;
73 } while (addr += PAGE_SIZE, addr != end);
74
75 if (!page_cache_add_speculative(head, refs)) {
76 *nr -= refs;
77 return 0;
78 }
79
80 if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
81 *nr -= refs;
82 while (refs--)
83 put_page(head);
84 }
85
86 return 1;
87}
88
89
90static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
91 unsigned long end, int write, struct page **pages, int *nr)
92{
93 unsigned long next;
94 pmd_t *pmdp, pmd;
95
96 pmdp = (pmd_t *) pudp;
97#ifdef CONFIG_64BIT
98 if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
99 pmdp = (pmd_t *) pud_deref(pud);
100 pmdp += pmd_index(addr);
101#endif
102 do {
103 pmd = *pmdp;
104 barrier();
105 next = pmd_addr_end(addr, end);
106 if (pmd_none(pmd))
107 return 0;
108 if (unlikely(pmd_huge(pmd))) {
109 if (!gup_huge_pmd(pmdp, pmd, addr, next,
110 write, pages, nr))
111 return 0;
112 } else if (!gup_pte_range(pmdp, pmd, addr, next,
113 write, pages, nr))
114 return 0;
115 } while (pmdp++, addr = next, addr != end);
116
117 return 1;
118}
119
120static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
121 unsigned long end, int write, struct page **pages, int *nr)
122{
123 unsigned long next;
124 pud_t *pudp, pud;
125
126 pudp = (pud_t *) pgdp;
127#ifdef CONFIG_64BIT
128 if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
129 pudp = (pud_t *) pgd_deref(pgd);
130 pudp += pud_index(addr);
131#endif
132 do {
133 pud = *pudp;
134 barrier();
135 next = pud_addr_end(addr, end);
136 if (pud_none(pud))
137 return 0;
138 if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr))
139 return 0;
140 } while (pudp++, addr = next, addr != end);
141
142 return 1;
143}
144
145/**
146 * get_user_pages_fast() - pin user pages in memory
147 * @start: starting user address
148 * @nr_pages: number of pages from start to pin
149 * @write: whether pages will be written to
150 * @pages: array that receives pointers to the pages pinned.
151 * Should be at least nr_pages long.
152 *
153 * Attempt to pin user pages in memory without taking mm->mmap_sem.
154 * If not successful, it will fall back to taking the lock and
155 * calling get_user_pages().
156 *
157 * Returns number of pages pinned. This may be fewer than the number
158 * requested. If nr_pages is 0 or negative, returns 0. If no pages
159 * were pinned, returns -errno.
160 */
161int get_user_pages_fast(unsigned long start, int nr_pages, int write,
162 struct page **pages)
163{
164 struct mm_struct *mm = current->mm;
165 unsigned long addr, len, end;
166 unsigned long next;
167 pgd_t *pgdp, pgd;
168 int nr = 0;
169
170 start &= PAGE_MASK;
171 addr = start;
172 len = (unsigned long) nr_pages << PAGE_SHIFT;
173 end = start + len;
174 if (end < start)
175 goto slow_irqon;
176
177 /*
178 * local_irq_disable() doesn't prevent pagetable teardown, but does
179 * prevent the pagetables from being freed on s390.
180 *
181 * So long as we atomically load page table pointers versus teardown,
182 * we can follow the address down to the the page and take a ref on it.
183 */
184 local_irq_disable();
185 pgdp = pgd_offset(mm, addr);
186 do {
187 pgd = *pgdp;
188 barrier();
189 next = pgd_addr_end(addr, end);
190 if (pgd_none(pgd))
191 goto slow;
192 if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
193 goto slow;
194 } while (pgdp++, addr = next, addr != end);
195 local_irq_enable();
196
197 VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
198 return nr;
199
200 {
201 int ret;
202slow:
203 local_irq_enable();
204slow_irqon:
205 /* Try to get the remaining pages with get_user_pages */
206 start += nr << PAGE_SHIFT;
207 pages += nr;
208
209 down_read(&mm->mmap_sem);
210 ret = get_user_pages(current, mm, start,
211 (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
212 up_read(&mm->mmap_sem);
213
214 /* Have to be a bit careful with return values */
215 if (nr > 0) {
216 if (ret < 0)
217 ret = nr;
218 else
219 ret += nr;
220 }
221
222 return ret;
223 }
224}
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index f28c43d2f61d..a4d856db9154 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -13,7 +13,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
13 pte_t *pteptr, pte_t pteval) 13 pte_t *pteptr, pte_t pteval)
14{ 14{
15 pmd_t *pmdp = (pmd_t *) pteptr; 15 pmd_t *pmdp = (pmd_t *) pteptr;
16 pte_t shadow_pteval = pteval;
17 unsigned long mask; 16 unsigned long mask;
18 17
19 if (!MACHINE_HAS_HPAGE) { 18 if (!MACHINE_HAS_HPAGE) {
@@ -21,18 +20,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
21 mask = pte_val(pteval) & 20 mask = pte_val(pteval) &
22 (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); 21 (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
23 pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; 22 pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
24 if (mm->context.noexec) {
25 pteptr += PTRS_PER_PTE;
26 pte_val(shadow_pteval) =
27 (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
28 }
29 } 23 }
30 24
31 pmd_val(*pmdp) = pte_val(pteval); 25 pmd_val(*pmdp) = pte_val(pteval);
32 if (mm->context.noexec) {
33 pmdp = get_shadow_table(pmdp);
34 pmd_val(*pmdp) = pte_val(shadow_pteval);
35 }
36} 26}
37 27
38int arch_prepare_hugepage(struct page *page) 28int arch_prepare_hugepage(struct page *page)
@@ -68,7 +58,7 @@ void arch_release_hugepage(struct page *page)
68 ptep = (pte_t *) page[1].index; 58 ptep = (pte_t *) page[1].index;
69 if (!ptep) 59 if (!ptep)
70 return; 60 return;
71 pte_free(&init_mm, ptep); 61 page_table_free(&init_mm, (unsigned long *) ptep);
72 page[1].index = 0; 62 page[1].index = 0;
73} 63}
74 64
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 30eb6d02ddb8..59b663109d90 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -38,19 +38,59 @@
38#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
39#include <asm/sections.h> 39#include <asm/sections.h>
40 40
41DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
42
43pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); 41pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
44 42
45char empty_zero_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); 43unsigned long empty_zero_page, zero_page_mask;
46EXPORT_SYMBOL(empty_zero_page); 44EXPORT_SYMBOL(empty_zero_page);
47 45
46static unsigned long setup_zero_pages(void)
47{
48 struct cpuid cpu_id;
49 unsigned int order;
50 unsigned long size;
51 struct page *page;
52 int i;
53
54 get_cpu_id(&cpu_id);
55 switch (cpu_id.machine) {
56 case 0x9672: /* g5 */
57 case 0x2064: /* z900 */
58 case 0x2066: /* z900 */
59 case 0x2084: /* z990 */
60 case 0x2086: /* z990 */
61 case 0x2094: /* z9-109 */
62 case 0x2096: /* z9-109 */
63 order = 0;
64 break;
65 case 0x2097: /* z10 */
66 case 0x2098: /* z10 */
67 default:
68 order = 2;
69 break;
70 }
71
72 empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
73 if (!empty_zero_page)
74 panic("Out of memory in setup_zero_pages");
75
76 page = virt_to_page((void *) empty_zero_page);
77 split_page(page, order);
78 for (i = 1 << order; i > 0; i--) {
79 SetPageReserved(page);
80 page++;
81 }
82
83 size = PAGE_SIZE << order;
84 zero_page_mask = (size - 1) & PAGE_MASK;
85
86 return 1UL << order;
87}
88
48/* 89/*
49 * paging_init() sets up the page tables 90 * paging_init() sets up the page tables
50 */ 91 */
51void __init paging_init(void) 92void __init paging_init(void)
52{ 93{
53 static const int ssm_mask = 0x04000000L;
54 unsigned long max_zone_pfns[MAX_NR_ZONES]; 94 unsigned long max_zone_pfns[MAX_NR_ZONES];
55 unsigned long pgd_type; 95 unsigned long pgd_type;
56 96
@@ -72,18 +112,17 @@ void __init paging_init(void)
72 __ctl_load(S390_lowcore.kernel_asce, 1, 1); 112 __ctl_load(S390_lowcore.kernel_asce, 1, 1);
73 __ctl_load(S390_lowcore.kernel_asce, 7, 7); 113 __ctl_load(S390_lowcore.kernel_asce, 7, 7);
74 __ctl_load(S390_lowcore.kernel_asce, 13, 13); 114 __ctl_load(S390_lowcore.kernel_asce, 13, 13);
75 __raw_local_irq_ssm(ssm_mask); 115 arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
76 116
77 atomic_set(&init_mm.context.attach_count, 1); 117 atomic_set(&init_mm.context.attach_count, 1);
78 118
79 sparse_memory_present_with_active_regions(MAX_NUMNODES); 119 sparse_memory_present_with_active_regions(MAX_NUMNODES);
80 sparse_init(); 120 sparse_init();
81 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 121 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
82#ifdef CONFIG_ZONE_DMA
83 max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); 122 max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
84#endif
85 max_zone_pfns[ZONE_NORMAL] = max_low_pfn; 123 max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
86 free_area_init_nodes(max_zone_pfns); 124 free_area_init_nodes(max_zone_pfns);
125 fault_init();
87} 126}
88 127
89void __init mem_init(void) 128void __init mem_init(void)
@@ -93,14 +132,12 @@ void __init mem_init(void)
93 max_mapnr = num_physpages = max_low_pfn; 132 max_mapnr = num_physpages = max_low_pfn;
94 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); 133 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
95 134
96 /* clear the zero-page */
97 memset(empty_zero_page, 0, PAGE_SIZE);
98
99 /* Setup guest page hinting */ 135 /* Setup guest page hinting */
100 cmma_init(); 136 cmma_init();
101 137
102 /* this will put all low memory onto the freelists */ 138 /* this will put all low memory onto the freelists */
103 totalram_pages += free_all_bootmem(); 139 totalram_pages += free_all_bootmem();
140 totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */
104 141
105 reservedpages = 0; 142 reservedpages = 0;
106 143
@@ -136,7 +173,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
136 pmd = pmd_offset(pud, address); 173 pmd = pmd_offset(pud, address);
137 pte = pte_offset_kernel(pmd, address); 174 pte = pte_offset_kernel(pmd, address);
138 if (!enable) { 175 if (!enable) {
139 ptep_invalidate(&init_mm, address, pte); 176 __ptep_ipte(address, pte);
177 pte_val(*pte) = _PAGE_TYPE_EMPTY;
140 continue; 178 continue;
141 } 179 }
142 *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW)); 180 *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index a8c2af8c650f..51e5cd9b906a 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -19,7 +19,7 @@
19 * using the stura instruction. 19 * using the stura instruction.
20 * Returns the number of bytes copied or -EFAULT. 20 * Returns the number of bytes copied or -EFAULT.
21 */ 21 */
22static long probe_kernel_write_odd(void *dst, void *src, size_t size) 22static long probe_kernel_write_odd(void *dst, const void *src, size_t size)
23{ 23{
24 unsigned long count, aligned; 24 unsigned long count, aligned;
25 int offset, mask; 25 int offset, mask;
@@ -45,7 +45,7 @@ static long probe_kernel_write_odd(void *dst, void *src, size_t size)
45 return rc ? rc : count; 45 return rc ? rc : count;
46} 46}
47 47
48long probe_kernel_write(void *dst, void *src, size_t size) 48long probe_kernel_write(void *dst, const void *src, size_t size)
49{ 49{
50 long copied = 0; 50 long copied = 0;
51 51
@@ -71,7 +71,7 @@ int memcpy_real(void *dest, void *src, size_t count)
71 71
72 if (!count) 72 if (!count)
73 return 0; 73 return 0;
74 flags = __raw_local_irq_stnsm(0xf8UL); 74 flags = __arch_local_irq_stnsm(0xf8UL);
75 asm volatile ( 75 asm volatile (
76 "0: mvcle %1,%2,0x0\n" 76 "0: mvcle %1,%2,0x0\n"
77 "1: jo 0b\n" 77 "1: jo 0b\n"
@@ -82,6 +82,6 @@ int memcpy_real(void *dest, void *src, size_t count)
82 "+d" (_len2), "=m" (*((long *) dest)) 82 "+d" (_len2), "=m" (*((long *) dest))
83 : "m" (*((long *) src)) 83 : "m" (*((long *) src))
84 : "cc", "memory"); 84 : "cc", "memory");
85 __raw_local_irq_ssm(flags); 85 arch_local_irq_restore(flags);
86 return rc; 86 return rc;
87} 87}
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 869efbaed3ea..c9a9f7f18188 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -27,17 +27,44 @@
27#include <linux/personality.h> 27#include <linux/personality.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/random.h>
30#include <asm/pgalloc.h> 31#include <asm/pgalloc.h>
31#include <asm/compat.h> 32#include <asm/compat.h>
32 33
34static unsigned long stack_maxrandom_size(void)
35{
36 if (!(current->flags & PF_RANDOMIZE))
37 return 0;
38 if (current->personality & ADDR_NO_RANDOMIZE)
39 return 0;
40 return STACK_RND_MASK << PAGE_SHIFT;
41}
42
33/* 43/*
34 * Top of mmap area (just below the process stack). 44 * Top of mmap area (just below the process stack).
35 * 45 *
36 * Leave an at least ~128 MB hole. 46 * Leave at least a ~32 MB hole.
37 */ 47 */
38#define MIN_GAP (128*1024*1024) 48#define MIN_GAP (32*1024*1024)
39#define MAX_GAP (STACK_TOP/6*5) 49#define MAX_GAP (STACK_TOP/6*5)
40 50
51static inline int mmap_is_legacy(void)
52{
53 if (current->personality & ADDR_COMPAT_LAYOUT)
54 return 1;
55 if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
56 return 1;
57 return sysctl_legacy_va_layout;
58}
59
60static unsigned long mmap_rnd(void)
61{
62 if (!(current->flags & PF_RANDOMIZE))
63 return 0;
64 /* 8MB randomization for mmap_base */
65 return (get_random_int() & 0x7ffUL) << PAGE_SHIFT;
66}
67
41static inline unsigned long mmap_base(void) 68static inline unsigned long mmap_base(void)
42{ 69{
43 unsigned long gap = rlimit(RLIMIT_STACK); 70 unsigned long gap = rlimit(RLIMIT_STACK);
@@ -46,22 +73,8 @@ static inline unsigned long mmap_base(void)
46 gap = MIN_GAP; 73 gap = MIN_GAP;
47 else if (gap > MAX_GAP) 74 else if (gap > MAX_GAP)
48 gap = MAX_GAP; 75 gap = MAX_GAP;
49 76 gap &= PAGE_MASK;
50 return STACK_TOP - (gap & PAGE_MASK); 77 return STACK_TOP - stack_maxrandom_size() - mmap_rnd() - gap;
51}
52
53static inline int mmap_is_legacy(void)
54{
55#ifdef CONFIG_64BIT
56 /*
57 * Force standard allocation for 64 bit programs.
58 */
59 if (!is_compat_task())
60 return 1;
61#endif
62 return sysctl_legacy_va_layout ||
63 (current->personality & ADDR_COMPAT_LAYOUT) ||
64 rlimit(RLIMIT_STACK) == RLIM_INFINITY;
65} 78}
66 79
67#ifndef CONFIG_64BIT 80#ifndef CONFIG_64BIT
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
new file mode 100644
index 000000000000..d013ed39743b
--- /dev/null
+++ b/arch/s390/mm/pageattr.c
@@ -0,0 +1,61 @@
1/*
2 * Copyright IBM Corp. 2011
3 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
4 */
5#include <linux/module.h>
6#include <linux/mm.h>
7#include <linux/hugetlb.h>
8#include <asm/pgtable.h>
9
10static void change_page_attr(unsigned long addr, int numpages,
11 pte_t (*set) (pte_t))
12{
13 pte_t *ptep, pte;
14 pmd_t *pmdp;
15 pud_t *pudp;
16 pgd_t *pgdp;
17 int i;
18
19 for (i = 0; i < numpages; i++) {
20 pgdp = pgd_offset(&init_mm, addr);
21 pudp = pud_offset(pgdp, addr);
22 pmdp = pmd_offset(pudp, addr);
23 if (pmd_huge(*pmdp)) {
24 WARN_ON_ONCE(1);
25 continue;
26 }
27 ptep = pte_offset_kernel(pmdp, addr);
28
29 pte = *ptep;
30 pte = set(pte);
31 __ptep_ipte(addr, ptep);
32 *ptep = pte;
33 addr += PAGE_SIZE;
34 }
35}
36
37int set_memory_ro(unsigned long addr, int numpages)
38{
39 change_page_attr(addr, numpages, pte_wrprotect);
40 return 0;
41}
42EXPORT_SYMBOL_GPL(set_memory_ro);
43
44int set_memory_rw(unsigned long addr, int numpages)
45{
46 change_page_attr(addr, numpages, pte_mkwrite);
47 return 0;
48}
49EXPORT_SYMBOL_GPL(set_memory_rw);
50
51/* not possible */
52int set_memory_nx(unsigned long addr, int numpages)
53{
54 return 0;
55}
56EXPORT_SYMBOL_GPL(set_memory_nx);
57
58int set_memory_x(unsigned long addr, int numpages)
59{
60 return 0;
61}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 8d999249d357..37a23c223705 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -15,6 +15,7 @@
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/quicklist.h> 17#include <linux/quicklist.h>
18#include <linux/rcupdate.h>
18 19
19#include <asm/system.h> 20#include <asm/system.h>
20#include <asm/pgtable.h> 21#include <asm/pgtable.h>
@@ -25,30 +26,10 @@
25 26
26#ifndef CONFIG_64BIT 27#ifndef CONFIG_64BIT
27#define ALLOC_ORDER 1 28#define ALLOC_ORDER 1
28#define TABLES_PER_PAGE 4 29#define FRAG_MASK 0x0f
29#define FRAG_MASK 15UL
30#define SECOND_HALVES 10UL
31
32void clear_table_pgstes(unsigned long *table)
33{
34 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
35 memset(table + 256, 0, PAGE_SIZE/4);
36 clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
37 memset(table + 768, 0, PAGE_SIZE/4);
38}
39
40#else 30#else
41#define ALLOC_ORDER 2 31#define ALLOC_ORDER 2
42#define TABLES_PER_PAGE 2 32#define FRAG_MASK 0x03
43#define FRAG_MASK 3UL
44#define SECOND_HALVES 2UL
45
46void clear_table_pgstes(unsigned long *table)
47{
48 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
49 memset(table + 256, 0, PAGE_SIZE/2);
50}
51
52#endif 33#endif
53 34
54unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; 35unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
@@ -63,37 +44,17 @@ static int __init parse_vmalloc(char *arg)
63} 44}
64early_param("vmalloc", parse_vmalloc); 45early_param("vmalloc", parse_vmalloc);
65 46
66unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) 47unsigned long *crst_table_alloc(struct mm_struct *mm)
67{ 48{
68 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 49 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
69 50
70 if (!page) 51 if (!page)
71 return NULL; 52 return NULL;
72 page->index = 0;
73 if (noexec) {
74 struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
75 if (!shadow) {
76 __free_pages(page, ALLOC_ORDER);
77 return NULL;
78 }
79 page->index = page_to_phys(shadow);
80 }
81 spin_lock(&mm->context.list_lock);
82 list_add(&page->lru, &mm->context.crst_list);
83 spin_unlock(&mm->context.list_lock);
84 return (unsigned long *) page_to_phys(page); 53 return (unsigned long *) page_to_phys(page);
85} 54}
86 55
87void crst_table_free(struct mm_struct *mm, unsigned long *table) 56void crst_table_free(struct mm_struct *mm, unsigned long *table)
88{ 57{
89 unsigned long *shadow = get_shadow_table(table);
90 struct page *page = virt_to_page(table);
91
92 spin_lock(&mm->context.list_lock);
93 list_del(&page->lru);
94 spin_unlock(&mm->context.list_lock);
95 if (shadow)
96 free_pages((unsigned long) shadow, ALLOC_ORDER);
97 free_pages((unsigned long) table, ALLOC_ORDER); 58 free_pages((unsigned long) table, ALLOC_ORDER);
98} 59}
99 60
@@ -105,10 +66,10 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
105 66
106 BUG_ON(limit > (1UL << 53)); 67 BUG_ON(limit > (1UL << 53));
107repeat: 68repeat:
108 table = crst_table_alloc(mm, mm->context.noexec); 69 table = crst_table_alloc(mm);
109 if (!table) 70 if (!table)
110 return -ENOMEM; 71 return -ENOMEM;
111 spin_lock(&mm->page_table_lock); 72 spin_lock_bh(&mm->page_table_lock);
112 if (mm->context.asce_limit < limit) { 73 if (mm->context.asce_limit < limit) {
113 pgd = (unsigned long *) mm->pgd; 74 pgd = (unsigned long *) mm->pgd;
114 if (mm->context.asce_limit <= (1UL << 31)) { 75 if (mm->context.asce_limit <= (1UL << 31)) {
@@ -130,7 +91,7 @@ repeat:
130 mm->task_size = mm->context.asce_limit; 91 mm->task_size = mm->context.asce_limit;
131 table = NULL; 92 table = NULL;
132 } 93 }
133 spin_unlock(&mm->page_table_lock); 94 spin_unlock_bh(&mm->page_table_lock);
134 if (table) 95 if (table)
135 crst_table_free(mm, table); 96 crst_table_free(mm, table);
136 if (mm->context.asce_limit < limit) 97 if (mm->context.asce_limit < limit)
@@ -172,94 +133,175 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
172} 133}
173#endif 134#endif
174 135
136static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
137{
138 unsigned int old, new;
139
140 do {
141 old = atomic_read(v);
142 new = old ^ bits;
143 } while (atomic_cmpxchg(v, old, new) != old);
144 return new;
145}
146
175/* 147/*
176 * page table entry allocation/free routines. 148 * page table entry allocation/free routines.
177 */ 149 */
150#ifdef CONFIG_PGSTE
151static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
152{
153 struct page *page;
154 unsigned long *table;
155
156 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
157 if (!page)
158 return NULL;
159 pgtable_page_ctor(page);
160 atomic_set(&page->_mapcount, 3);
161 table = (unsigned long *) page_to_phys(page);
162 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
163 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
164 return table;
165}
166
167static inline void page_table_free_pgste(unsigned long *table)
168{
169 struct page *page;
170
171 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
172 pgtable_page_ctor(page);
173 atomic_set(&page->_mapcount, -1);
174 __free_page(page);
175}
176#endif
177
178unsigned long *page_table_alloc(struct mm_struct *mm) 178unsigned long *page_table_alloc(struct mm_struct *mm)
179{ 179{
180 struct page *page; 180 struct page *page;
181 unsigned long *table; 181 unsigned long *table;
182 unsigned long bits; 182 unsigned int mask, bit;
183 183
184 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 184#ifdef CONFIG_PGSTE
185 spin_lock(&mm->context.list_lock); 185 if (mm_has_pgste(mm))
186 page = NULL; 186 return page_table_alloc_pgste(mm);
187#endif
188 /* Allocate fragments of a 4K page as 1K/2K page table */
189 spin_lock_bh(&mm->context.list_lock);
190 mask = FRAG_MASK;
187 if (!list_empty(&mm->context.pgtable_list)) { 191 if (!list_empty(&mm->context.pgtable_list)) {
188 page = list_first_entry(&mm->context.pgtable_list, 192 page = list_first_entry(&mm->context.pgtable_list,
189 struct page, lru); 193 struct page, lru);
190 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) 194 table = (unsigned long *) page_to_phys(page);
191 page = NULL; 195 mask = atomic_read(&page->_mapcount);
196 mask = mask | (mask >> 4);
192 } 197 }
193 if (!page) { 198 if ((mask & FRAG_MASK) == FRAG_MASK) {
194 spin_unlock(&mm->context.list_lock); 199 spin_unlock_bh(&mm->context.list_lock);
195 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 200 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
196 if (!page) 201 if (!page)
197 return NULL; 202 return NULL;
198 pgtable_page_ctor(page); 203 pgtable_page_ctor(page);
199 page->flags &= ~FRAG_MASK; 204 atomic_set(&page->_mapcount, 1);
200 table = (unsigned long *) page_to_phys(page); 205 table = (unsigned long *) page_to_phys(page);
201 if (mm->context.has_pgste) 206 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
202 clear_table_pgstes(table); 207 spin_lock_bh(&mm->context.list_lock);
203 else
204 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
205 spin_lock(&mm->context.list_lock);
206 list_add(&page->lru, &mm->context.pgtable_list); 208 list_add(&page->lru, &mm->context.pgtable_list);
209 } else {
210 for (bit = 1; mask & bit; bit <<= 1)
211 table += PTRS_PER_PTE;
212 mask = atomic_xor_bits(&page->_mapcount, bit);
213 if ((mask & FRAG_MASK) == FRAG_MASK)
214 list_del(&page->lru);
207 } 215 }
208 table = (unsigned long *) page_to_phys(page); 216 spin_unlock_bh(&mm->context.list_lock);
209 while (page->flags & bits) {
210 table += 256;
211 bits <<= 1;
212 }
213 page->flags |= bits;
214 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
215 list_move_tail(&page->lru, &mm->context.pgtable_list);
216 spin_unlock(&mm->context.list_lock);
217 return table; 217 return table;
218} 218}
219 219
220void page_table_free(struct mm_struct *mm, unsigned long *table) 220void page_table_free(struct mm_struct *mm, unsigned long *table)
221{ 221{
222 struct page *page; 222 struct page *page;
223 unsigned long bits; 223 unsigned int bit, mask;
224 224
225 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 225#ifdef CONFIG_PGSTE
226 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 226 if (mm_has_pgste(mm))
227 return page_table_free_pgste(table);
228#endif
229 /* Free 1K/2K page table fragment of a 4K page */
227 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 230 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
228 spin_lock(&mm->context.list_lock); 231 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
229 page->flags ^= bits; 232 spin_lock_bh(&mm->context.list_lock);
230 if (page->flags & FRAG_MASK) { 233 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
231 /* Page now has some free pgtable fragments. */
232 list_move(&page->lru, &mm->context.pgtable_list);
233 page = NULL;
234 } else
235 /* All fragments of the 4K page have been freed. */
236 list_del(&page->lru); 234 list_del(&page->lru);
237 spin_unlock(&mm->context.list_lock); 235 mask = atomic_xor_bits(&page->_mapcount, bit);
238 if (page) { 236 if (mask & FRAG_MASK)
237 list_add(&page->lru, &mm->context.pgtable_list);
238 spin_unlock_bh(&mm->context.list_lock);
239 if (mask == 0) {
239 pgtable_page_dtor(page); 240 pgtable_page_dtor(page);
241 atomic_set(&page->_mapcount, -1);
240 __free_page(page); 242 __free_page(page);
241 } 243 }
242} 244}
243 245
244void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) 246#ifdef CONFIG_HAVE_RCU_TABLE_FREE
247
248static void __page_table_free_rcu(void *table, unsigned bit)
245{ 249{
246 struct page *page; 250 struct page *page;
247 251
248 spin_lock(&mm->context.list_lock); 252#ifdef CONFIG_PGSTE
249 /* Free shadow region and segment tables. */ 253 if (bit == FRAG_MASK)
250 list_for_each_entry(page, &mm->context.crst_list, lru) 254 return page_table_free_pgste(table);
251 if (page->index) { 255#endif
252 free_pages((unsigned long) page->index, ALLOC_ORDER); 256 /* Free 1K/2K page table fragment of a 4K page */
253 page->index = 0; 257 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
254 } 258 if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
255 /* "Free" second halves of page tables. */ 259 pgtable_page_dtor(page);
256 list_for_each_entry(page, &mm->context.pgtable_list, lru) 260 atomic_set(&page->_mapcount, -1);
257 page->flags &= ~SECOND_HALVES; 261 __free_page(page);
258 spin_unlock(&mm->context.list_lock); 262 }
259 mm->context.noexec = 0;
260 update_mm(mm, tsk);
261} 263}
262 264
265void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
266{
267 struct mm_struct *mm;
268 struct page *page;
269 unsigned int bit, mask;
270
271 mm = tlb->mm;
272#ifdef CONFIG_PGSTE
273 if (mm_has_pgste(mm)) {
274 table = (unsigned long *) (__pa(table) | FRAG_MASK);
275 tlb_remove_table(tlb, table);
276 return;
277 }
278#endif
279 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
280 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
281 spin_lock_bh(&mm->context.list_lock);
282 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
283 list_del(&page->lru);
284 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
285 if (mask & FRAG_MASK)
286 list_add_tail(&page->lru, &mm->context.pgtable_list);
287 spin_unlock_bh(&mm->context.list_lock);
288 table = (unsigned long *) (__pa(table) | (bit << 4));
289 tlb_remove_table(tlb, table);
290}
291
292void __tlb_remove_table(void *_table)
293{
294 void *table = (void *)((unsigned long) _table & PAGE_MASK);
295 unsigned type = (unsigned long) _table & ~PAGE_MASK;
296
297 if (type)
298 __page_table_free_rcu(table, type);
299 else
300 free_pages((unsigned long) table, ALLOC_ORDER);
301}
302
303#endif
304
263/* 305/*
264 * switch on pgstes for its userspace process (for kvm) 306 * switch on pgstes for its userspace process (for kvm)
265 */ 307 */
@@ -273,7 +315,7 @@ int s390_enable_sie(void)
273 return -EINVAL; 315 return -EINVAL;
274 316
275 /* Do we have pgstes? if yes, we are done */ 317 /* Do we have pgstes? if yes, we are done */
276 if (tsk->mm->context.has_pgste) 318 if (mm_has_pgste(tsk->mm))
277 return 0; 319 return 0;
278 320
279 /* lets check if we are allowed to replace the mm */ 321 /* lets check if we are allowed to replace the mm */
@@ -312,6 +354,8 @@ int s390_enable_sie(void)
312 tsk->mm = tsk->active_mm = mm; 354 tsk->mm = tsk->active_mm = mm;
313 preempt_disable(); 355 preempt_disable();
314 update_mm(mm, tsk); 356 update_mm(mm, tsk);
357 atomic_inc(&mm->context.attach_count);
358 atomic_dec(&old_mm->context.attach_count);
315 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); 359 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
316 preempt_enable(); 360 preempt_enable();
317 task_unlock(tsk); 361 task_unlock(tsk);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 34c43f23b28c..8c1970d1dd91 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -95,7 +95,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
95 pu_dir = vmem_pud_alloc(); 95 pu_dir = vmem_pud_alloc();
96 if (!pu_dir) 96 if (!pu_dir)
97 goto out; 97 goto out;
98 pgd_populate_kernel(&init_mm, pg_dir, pu_dir); 98 pgd_populate(&init_mm, pg_dir, pu_dir);
99 } 99 }
100 100
101 pu_dir = pud_offset(pg_dir, address); 101 pu_dir = pud_offset(pg_dir, address);
@@ -103,7 +103,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
103 pm_dir = vmem_pmd_alloc(); 103 pm_dir = vmem_pmd_alloc();
104 if (!pm_dir) 104 if (!pm_dir)
105 goto out; 105 goto out;
106 pud_populate_kernel(&init_mm, pu_dir, pm_dir); 106 pud_populate(&init_mm, pu_dir, pm_dir);
107 } 107 }
108 108
109 pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); 109 pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
@@ -123,7 +123,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
123 pt_dir = vmem_pte_alloc(); 123 pt_dir = vmem_pte_alloc();
124 if (!pt_dir) 124 if (!pt_dir)
125 goto out; 125 goto out;
126 pmd_populate_kernel(&init_mm, pm_dir, pt_dir); 126 pmd_populate(&init_mm, pm_dir, pt_dir);
127 } 127 }
128 128
129 pt_dir = pte_offset_kernel(pm_dir, address); 129 pt_dir = pte_offset_kernel(pm_dir, address);
@@ -159,7 +159,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
159 continue; 159 continue;
160 160
161 if (pmd_huge(*pm_dir)) { 161 if (pmd_huge(*pm_dir)) {
162 pmd_clear_kernel(pm_dir); 162 pmd_clear(pm_dir);
163 address += HPAGE_SIZE - PAGE_SIZE; 163 address += HPAGE_SIZE - PAGE_SIZE;
164 continue; 164 continue;
165 } 165 }
@@ -192,7 +192,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
192 pu_dir = vmem_pud_alloc(); 192 pu_dir = vmem_pud_alloc();
193 if (!pu_dir) 193 if (!pu_dir)
194 goto out; 194 goto out;
195 pgd_populate_kernel(&init_mm, pg_dir, pu_dir); 195 pgd_populate(&init_mm, pg_dir, pu_dir);
196 } 196 }
197 197
198 pu_dir = pud_offset(pg_dir, address); 198 pu_dir = pud_offset(pg_dir, address);
@@ -200,7 +200,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
200 pm_dir = vmem_pmd_alloc(); 200 pm_dir = vmem_pmd_alloc();
201 if (!pm_dir) 201 if (!pm_dir)
202 goto out; 202 goto out;
203 pud_populate_kernel(&init_mm, pu_dir, pm_dir); 203 pud_populate(&init_mm, pu_dir, pm_dir);
204 } 204 }
205 205
206 pm_dir = pmd_offset(pu_dir, address); 206 pm_dir = pmd_offset(pu_dir, address);
@@ -208,7 +208,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
208 pt_dir = vmem_pte_alloc(); 208 pt_dir = vmem_pte_alloc();
209 if (!pt_dir) 209 if (!pt_dir)
210 goto out; 210 goto out;
211 pmd_populate_kernel(&init_mm, pm_dir, pt_dir); 211 pmd_populate(&init_mm, pm_dir, pt_dir);
212 } 212 }
213 213
214 pt_dir = pte_offset_kernel(pm_dir, address); 214 pt_dir = pte_offset_kernel(pm_dir, address);