Patched in Tegra support.

author: Jonathan Herman <hermanjl@cs.unc.edu> 2013-01-17 16:15:55 -0500
committer: Jonathan Herman <hermanjl@cs.unc.edu> 2013-01-17 16:15:55 -0500
commit: 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
tree: a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /arch/s390/mm
parent: 406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
14 files changed, 271 insertions, 1124 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 640bea12303..d98fe9004a5 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -2,9 +2,8 @@
 # Makefile for the linux s390-specific parts of the memory manager.
 #
-obj-y           := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o
+obj-y    := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \
-obj-y           += page-states.o gup.o extable.o pageattr.o
+            page-states.o gup.o
+obj-$(CONFIG_CMM) += cmm.o
-obj-$(CONFIG_CMM)               += cmm.o
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_HUGETLB_PAGE)      += hugetlbpage.o
+obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o
-obj-$(CONFIG_S390_PTDUMP)       += dump_pagetables.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 479e9428291..1f1dba9dcf5 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -1,7 +1,7 @@
 /*
 *  Collaborative memory management interface.
 *
- *    Copyright IBM Corp 2003, 2010
+ *    Copyright IBM Corp 2003,2010
 *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
 *
 */
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
deleted file mode 100644
index 04e4892247d..00000000000
--- a/arch/s390/mm/dump_pagetables.c
+++ /dev/null
@@ -1,231 +0,0 @@
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <asm/sections.h>
-#include <asm/pgtable.h>
-static unsigned long max_addr;
-struct addr_marker {
-        unsigned long start_address;
-        const char *name;
-};
-enum address_markers_idx {
-        IDENTITY_NR = 0,
-        KERNEL_START_NR,
-        KERNEL_END_NR,
-        VMEMMAP_NR,
-        VMALLOC_NR,
-#ifdef CONFIG_64BIT
-        MODULES_NR,
-#endif
-};
-static struct addr_marker address_markers[] = {
-        [IDENTITY_NR]     = {0, "Identity Mapping"},
-        [KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"},
-        [KERNEL_END_NR]   = {(unsigned long)&_end, "Kernel Image End"},
-        [VMEMMAP_NR]      = {0, "vmemmap Area"},
-        [VMALLOC_NR]      = {0, "vmalloc Area"},
-#ifdef CONFIG_64BIT
-        [MODULES_NR]      = {0, "Modules Area"},
-#endif
-        { -1, NULL }
-};
-struct pg_state {
-        int level;
-        unsigned int current_prot;
-        unsigned long start_address;
-        unsigned long current_address;
-        const struct addr_marker *marker;
-};
-static void print_prot(struct seq_file *m, unsigned int pr, int level)
-{
-        static const char * const level_name[] =
-                { "ASCE", "PGD", "PUD", "PMD", "PTE" };
-        seq_printf(m, "%s ", level_name[level]);
-        if (pr & _PAGE_INVALID)
-                seq_printf(m, "I\n");
-        else
-                seq_printf(m, "%s\n", pr & _PAGE_RO ? "RO" : "RW");
-}
-static void note_page(struct seq_file *m, struct pg_state *st,
-                     unsigned int new_prot, int level)
-{
-        static const char units[] = "KMGTPE";
-        int width = sizeof(unsigned long) * 2;
-        const char *unit = units;
-        unsigned int prot, cur;
-        unsigned long delta;
-        /*
-         * If we have a "break" in the series, we need to flush the state
-         * that we have now. "break" is either changing perms, levels or
-         * address space marker.
-         */
-        prot = new_prot;
-        cur = st->current_prot;
-        if (!st->level) {
-                /* First entry */
-                st->current_prot = new_prot;
-                st->level = level;
-                st->marker = address_markers;
-                seq_printf(m, "---[ %s ]---\n", st->marker->name);
-        } else if (prot != cur || level != st->level ||
-                   st->current_address >= st->marker[1].start_address) {
-                /* Print the actual finished series */
-                seq_printf(m, "0x%0*lx-0x%0*lx",
-                           width, st->start_address,
-                           width, st->current_address);
-                delta = (st->current_address - st->start_address) >> 10;
-                while (!(delta & 0x3ff) && unit[1]) {
-                        delta >>= 10;
-                        unit++;
-                }
-                seq_printf(m, "%9lu%c ", delta, *unit);
-                print_prot(m, st->current_prot, st->level);
-                if (st->current_address >= st->marker[1].start_address) {
-                        st->marker++;
-                        seq_printf(m, "---[ %s ]---\n", st->marker->name);
-                }
-                st->start_address = st->current_address;
-                st->current_prot = new_prot;
-                st->level = level;
-        }
-}
-/*
- * The actual page table walker functions. In order to keep the implementation
- * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO
- * flags to note_page() if a region, segment or page table entry is invalid or
- * read-only.
- * After all it's just a hint that the current level being walked contains an
- * invalid or read-only entry.
- */
-static void walk_pte_level(struct seq_file *m, struct pg_state *st,
-                           pmd_t *pmd, unsigned long addr)
-{
-        unsigned int prot;
-        pte_t *pte;
-        int i;
-        for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
-                st->current_address = addr;
-                pte = pte_offset_kernel(pmd, addr);
-                prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID);
-                note_page(m, st, prot, 4);
-                addr += PAGE_SIZE;
-        }
-}
-static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
-                           pud_t *pud, unsigned long addr)
-{
-        unsigned int prot;
-        pmd_t *pmd;
-        int i;
-        for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) {
-                st->current_address = addr;
-                pmd = pmd_offset(pud, addr);
-                if (!pmd_none(*pmd)) {
-                        if (pmd_large(*pmd)) {
-                                prot = pmd_val(*pmd) & _SEGMENT_ENTRY_RO;
-                                note_page(m, st, prot, 3);
-                        } else
-                                walk_pte_level(m, st, pmd, addr);
-                } else
-                        note_page(m, st, _PAGE_INVALID, 3);
-                addr += PMD_SIZE;
-        }
-}
-static void walk_pud_level(struct seq_file *m, struct pg_state *st,
-                           pgd_t *pgd, unsigned long addr)
-{
-        unsigned int prot;
-        pud_t *pud;
-        int i;
-        for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
-                st->current_address = addr;
-                pud = pud_offset(pgd, addr);
-                if (!pud_none(*pud))
-                        if (pud_large(*pud)) {
-                                prot = pud_val(*pud) & _PAGE_RO;
-                                note_page(m, st, prot, 2);
-                        } else
-                                walk_pmd_level(m, st, pud, addr);
-                else
-                        note_page(m, st, _PAGE_INVALID, 2);
-                addr += PUD_SIZE;
-        }
-}
-static void walk_pgd_level(struct seq_file *m)
-{
-        unsigned long addr = 0;
-        struct pg_state st;
-        pgd_t *pgd;
-        int i;
-        memset(&st, 0, sizeof(st));
-        for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) {
-                st.current_address = addr;
-                pgd = pgd_offset_k(addr);
-                if (!pgd_none(*pgd))
-                        walk_pud_level(m, &st, pgd, addr);
-                else
-                        note_page(m, &st, _PAGE_INVALID, 1);
-                addr += PGDIR_SIZE;
-        }
-        /* Flush out the last page */
-        st.current_address = max_addr;
-        note_page(m, &st, 0, 0);
-}
-static int ptdump_show(struct seq_file *m, void *v)
-{
-        walk_pgd_level(m);
-        return 0;
-}
-static int ptdump_open(struct inode *inode, struct file *filp)
-{
-        return single_open(filp, ptdump_show, NULL);
-}
-static const struct file_operations ptdump_fops = {
-        .open           = ptdump_open,
-        .read           = seq_read,
-        .llseek         = seq_lseek,
-        .release        = single_release,
-};
-static int pt_dump_init(void)
-{
-        /*
-         * Figure out the maximum virtual address being accessible with the
-         * kernel ASCE. We need this to keep the page table walker functions
-         * from accessing non-existent entries.
-         */
-#ifdef CONFIG_32BIT
-        max_addr = 1UL << 31;
-#else
-        max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
-        max_addr = 1UL << (max_addr * 11 + 31);
-        address_markers[MODULES_NR].start_address = MODULES_VADDR;
-#endif
-        address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
-        address_markers[VMALLOC_NR].start_address = VMALLOC_START;
-        debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
-        return 0;
-}
-device_initcall(pt_dump_init);
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
deleted file mode 100644
index 4d1ee88864e..00000000000
--- a/arch/s390/mm/extable.c
+++ /dev/null
@@ -1,81 +0,0 @@
-#include <linux/module.h>
-#include <linux/sort.h>
-#include <asm/uaccess.h>
-/*
- * Search one exception table for an entry corresponding to the
- * given instruction address, and return the address of the entry,
- * or NULL if none is found.
- * We use a binary search, and thus we assume that the table is
- * already sorted.
- */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-               const struct exception_table_entry *last,
-               unsigned long value)
-{
-        const struct exception_table_entry *mid;
-        unsigned long addr;
-        while (first <= last) {
-                mid = ((last - first) >> 1) + first;
-                addr = extable_insn(mid);
-                if (addr < value)
-                        first = mid + 1;
-                else if (addr > value)
-                        last = mid - 1;
-                else
-                        return mid;
-        }
-        return NULL;
-}
-/*
- * The exception table needs to be sorted so that the binary
- * search that we use to find entries in it works properly.
- * This is used both for the kernel exception table and for
- * the exception tables of modules that get loaded.
- *
- */
-static int cmp_ex(const void *a, const void *b)
-{
-        const struct exception_table_entry *x = a, *y = b;
-        /* This compare is only valid after normalization. */
-        return x->insn - y->insn;
-}
-void sort_extable(struct exception_table_entry *start,
-                  struct exception_table_entry *finish)
-{
-        struct exception_table_entry *p;
-        int i;
-        /* Normalize entries to being relative to the start of the section */
-        for (p = start, i = 0; p < finish; p++, i += 8)
-                p->insn += i;
-        sort(start, finish - start, sizeof(*start), cmp_ex, NULL);
-        /* Denormalize all entries */
-        for (p = start, i = 0; p < finish; p++, i += 8)
-                p->insn -= i;
-}
-#ifdef CONFIG_MODULES
-/*
- * If the exception table is sorted, any referring to the module init
- * will be at the beginning or the end.
- */
-void trim_init_extable(struct module *m)
-{
-        /* Trim the beginning */
-        while (m->num_exentries &&
-               within_module_init(extable_insn(&m->extable[0]), m)) {
-                m->extable++;
-                m->num_exentries--;
-        }
-        /* Trim the end */
-        while (m->num_exentries &&
-               within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m))
-                m->num_exentries--;
-}
-#endif /* CONFIG_MODULES */
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 519bba716cc..075ddada491 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -1,9 +1,10 @@
 /*
+ * File...........: arch/s390/mm/extmem.c
 * Author(s)......: Carsten Otte <cotte@de.ibm.com>
 *                  Rob M van der Heij <rvdheij@nl.ibm.com>
 *                  Steven Shultz <shultzss@us.ibm.com>
 * Bugreports.to..: <Linux390@de.ibm.com>
- * Copyright IBM Corp. 2002, 2004
+ * (C) IBM Corporation 2002-2004
 */
 #define KMSG_COMPONENT "extmem"
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2fb9e63b8fc..9564fc779b2 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -1,6 +1,8 @@
 /*
+ *  arch/s390/mm/fault.c
+ *
 *  S390 version
- *    Copyright IBM Corp. 1999
+ *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
 *    Author(s): Hartmut Penner (hp@de.ibm.com)
 *               Ulrich Weigand (uweigand@de.ibm.com)
 *
@@ -30,10 +32,11 @@
 #include <linux/uaccess.h>
 #include <linux/hugetlb.h>
 #include <asm/asm-offsets.h>
+#include <asm/system.h>
 #include <asm/pgtable.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
-#include <asm/facility.h>
+#include <asm/compat.h>
 #include "../kernel/entry.h"
 #ifndef CONFIG_64BIT
@@ -49,19 +52,14 @@
 #define VM_FAULT_BADCONTEXT     0x010000
 #define VM_FAULT_BADMAP         0x020000
 #define VM_FAULT_BADACCESS      0x040000
-#define VM_FAULT_SIGNAL         0x080000
-static unsigned long store_indication __read_mostly;
+static unsigned long store_indication;
-#ifdef CONFIG_64BIT
+void fault_init(void)
-static int __init fault_init(void)
 {
-        if (test_facility(75))
+        if (test_facility(2) && test_facility(75))
                store_indication = 0xc00;
-        return 0;
 }
-early_initcall(fault_init);
-#endif
 static inline int notify_page_fault(struct pt_regs *regs)
 {
@@ -115,7 +113,7 @@ static inline int user_space_fault(unsigned long trans_exc_code)
        if (trans_exc_code == 2)
                /* Access via secondary space, set_fs setting decides */
                return current->thread.mm_segment.ar4;
-        if (s390_user_mode == HOME_SPACE_MODE)
+        if (user_mode == HOME_SPACE_MODE)
                /* User space if the access has been done via home space. */
                return trans_exc_code == 3;
        /*
@@ -127,7 +125,8 @@ static inline int user_space_fault(unsigned long trans_exc_code)
        return trans_exc_code != 3;
 }
-static inline void report_user_fault(struct pt_regs *regs, long signr)
+static inline void report_user_fault(struct pt_regs *regs, long int_code,
+                                     int signr, unsigned long address)
 {
        if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
                return;
@@ -135,12 +134,10 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
                return;
        if (!printk_ratelimit())
                return;
-        printk(KERN_ALERT "User process fault: interruption code 0x%X ",
+        printk("User process fault: interruption code 0x%lX ", int_code);
-               regs->int_code);
        print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
-        printk(KERN_CONT "\n");
+        printk("\n");
-        printk(KERN_ALERT "failing address: %lX\n",
+        printk("failing address: %lX\n", address);
-               regs->int_parm_long & __FAIL_ADDR_MASK);
        show_regs(regs);
 }
@@ -148,18 +145,24 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
 * Send SIGSEGV to task.  This is an external routine
 * to keep the stack usage of do_page_fault small.
 */
-static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
+static noinline void do_sigsegv(struct pt_regs *regs, long int_code,
+                                int si_code, unsigned long trans_exc_code)
 {
        struct siginfo si;
+        unsigned long address;
-        report_user_fault(regs, SIGSEGV);
+        address = trans_exc_code & __FAIL_ADDR_MASK;
+        current->thread.prot_addr = address;
+        current->thread.trap_no = int_code;
+        report_user_fault(regs, int_code, SIGSEGV, address);
        si.si_signo = SIGSEGV;
        si.si_code = si_code;
-        si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
+        si.si_addr = (void __user *) address;
        force_sig_info(SIGSEGV, &si, current);
 }
-static noinline void do_no_context(struct pt_regs *regs)
+static noinline void do_no_context(struct pt_regs *regs, long int_code,
+                                   unsigned long trans_exc_code)
 {
        const struct exception_table_entry *fixup;
        unsigned long address;
@@ -167,7 +170,7 @@ static noinline void do_no_context(struct pt_regs *regs)
        /* Are we prepared to handle this kernel fault?  */
        fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
        if (fixup) {
-                regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+                regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE;
                return;
        }
@@ -175,48 +178,55 @@ static noinline void do_no_context(struct pt_regs *regs)
         * Oops. The kernel tried to access some bad page. We'll have to
         * terminate things with extreme prejudice.
         */
-        address = regs->int_parm_long & __FAIL_ADDR_MASK;
+        address = trans_exc_code & __FAIL_ADDR_MASK;
-        if (!user_space_fault(regs->int_parm_long))
+        if (!user_space_fault(trans_exc_code))
                printk(KERN_ALERT "Unable to handle kernel pointer dereference"
                       " at virtual kernel address %p\n", (void *)address);
        else
                printk(KERN_ALERT "Unable to handle kernel paging request"
                       " at virtual user address %p\n", (void *)address);
-        die(regs, "Oops");
+        die("Oops", regs, int_code);
        do_exit(SIGKILL);
 }
-static noinline void do_low_address(struct pt_regs *regs)
+static noinline void do_low_address(struct pt_regs *regs, long int_code,
+                                    unsigned long trans_exc_code)
 {
        /* Low-address protection hit in kernel mode means
           NULL pointer write access in kernel mode.  */
        if (regs->psw.mask & PSW_MASK_PSTATE) {
                /* Low-address protection hit in user mode 'cannot happen'. */
-                die (regs, "Low-address protection");
+                die ("Low-address protection", regs, int_code);
                do_exit(SIGKILL);
        }
-        do_no_context(regs);
+        do_no_context(regs, int_code, trans_exc_code);
 }
-static noinline void do_sigbus(struct pt_regs *regs)
+static noinline void do_sigbus(struct pt_regs *regs, long int_code,
+                               unsigned long trans_exc_code)
 {
        struct task_struct *tsk = current;
+        unsigned long address;
        struct siginfo si;
        /*
         * Send a sigbus, regardless of whether we were in kernel
         * or user mode.
         */
+        address = trans_exc_code & __FAIL_ADDR_MASK;
+        tsk->thread.prot_addr = address;
+        tsk->thread.trap_no = int_code;
        si.si_signo = SIGBUS;
        si.si_errno = 0;
        si.si_code = BUS_ADRERR;
-        si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
+        si.si_addr = (void __user *) address;
        force_sig_info(SIGBUS, &si, tsk);
 }
-static noinline void do_fault_error(struct pt_regs *regs, int fault)
+static noinline void do_fault_error(struct pt_regs *regs, long int_code,
+                                    unsigned long trans_exc_code, int fault)
 {
        int si_code;
@@ -224,32 +234,28 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
        case VM_FAULT_BADACCESS:
        case VM_FAULT_BADMAP:
                /* Bad memory access. Check if it is kernel or user space. */
-                if (user_mode(regs)) {
+                if (regs->psw.mask & PSW_MASK_PSTATE) {
                        /* User mode accesses just cause a SIGSEGV */
                        si_code = (fault == VM_FAULT_BADMAP) ?
                                SEGV_MAPERR : SEGV_ACCERR;
-                        do_sigsegv(regs, si_code);
+                        do_sigsegv(regs, int_code, si_code, trans_exc_code);
                        return;
                }
        case VM_FAULT_BADCONTEXT:
-                do_no_context(regs);
+                do_no_context(regs, int_code, trans_exc_code);
-                break;
-        case VM_FAULT_SIGNAL:
-                if (!user_mode(regs))
-                        do_no_context(regs);
                break;
        default: /* fault & VM_FAULT_ERROR */
                if (fault & VM_FAULT_OOM) {
-                        if (!user_mode(regs))
+                        if (!(regs->psw.mask & PSW_MASK_PSTATE))
-                                do_no_context(regs);
+                                do_no_context(regs, int_code, trans_exc_code);
                        else
                                pagefault_out_of_memory();
                } else if (fault & VM_FAULT_SIGBUS) {
                        /* Kernel mode? Handle exceptions or die */
-                        if (!user_mode(regs))
+                        if (!(regs->psw.mask & PSW_MASK_PSTATE))
-                                do_no_context(regs);
+                                do_no_context(regs, int_code, trans_exc_code);
                        else
-                                do_sigbus(regs);
+                                do_sigbus(regs, int_code, trans_exc_code);
                } else
                        BUG();
                break;
@@ -267,28 +273,21 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
 *   11       Page translation     ->  Not present       (nullification)
 *   3b       Region third trans.  ->  Not present       (nullification)
 */
-static inline int do_exception(struct pt_regs *regs, int access)
+static inline int do_exception(struct pt_regs *regs, int access,
+                               unsigned long trans_exc_code)
 {
        struct task_struct *tsk;
        struct mm_struct *mm;
        struct vm_area_struct *vma;
-        unsigned long trans_exc_code;
        unsigned long address;
        unsigned int flags;
        int fault;
-        tsk = current;
-        /*
-         * The instruction that caused the program check has
-         * been nullified. Don't signal single step via SIGTRAP.
-         */
-        clear_tsk_thread_flag(tsk, TIF_PER_TRAP);
        if (notify_page_fault(regs))
                return 0;
+        tsk = current;
        mm = tsk->mm;
-        trans_exc_code = regs->int_parm_long;
        /*
         * Verify that the fault happened in user space, that
@@ -301,14 +300,14 @@ static inline int do_exception(struct pt_regs *regs, int access)
        address = trans_exc_code & __FAIL_ADDR_MASK;
        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
-        flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+        flags = FAULT_FLAG_ALLOW_RETRY;
        if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
                flags |= FAULT_FLAG_WRITE;
        down_read(&mm->mmap_sem);
 #ifdef CONFIG_PGSTE
-        if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
+        if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) {
-                address = __gmap_fault(address,
+                address = gmap_fault(address,
                                     (struct gmap *) S390_lowcore.gmap);
                if (address == -EFAULT) {
                        fault = VM_FAULT_BADMAP;
@@ -350,11 +349,6 @@ retry:
         * the fault.
         */
        fault = handle_mm_fault(mm, vma, address, flags);
-        /* No reason to continue if interrupted by SIGKILL. */
-        if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
-                fault = VM_FAULT_SIGNAL;
-                goto out;
-        }
        if (unlikely(fault & VM_FAULT_ERROR))
                goto out_up;
@@ -377,11 +371,15 @@ retry:
                        /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
                         * of starvation. */
                        flags &= ~FAULT_FLAG_ALLOW_RETRY;
-                        flags |= FAULT_FLAG_TRIED;
                        down_read(&mm->mmap_sem);
                        goto retry;
                }
        }
+        /*
+         * The instruction that caused the program check will
+         * be repeated. Don't signal single step via SIGTRAP.
+         */
+        clear_tsk_thread_flag(tsk, TIF_PER_TRAP);
        fault = 0;
 out_up:
        up_read(&mm->mmap_sem);
@@ -389,52 +387,45 @@ out:
        return fault;
 }
-void __kprobes do_protection_exception(struct pt_regs *regs)
+void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code,
+                                       unsigned long trans_exc_code)
 {
-        unsigned long trans_exc_code;
        int fault;
-        trans_exc_code = regs->int_parm_long;
        /* Protection exception is suppressing, decrement psw address. */
-        regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
+        regs->psw.addr -= (pgm_int_code >> 16);
        /*
         * Check for low-address protection.  This needs to be treated
         * as a special case because the translation exception code
         * field is not guaranteed to contain valid data in this case.
         */
        if (unlikely(!(trans_exc_code & 4))) {
-                do_low_address(regs);
+                do_low_address(regs, pgm_int_code, trans_exc_code);
                return;
        }
-        fault = do_exception(regs, VM_WRITE);
+        fault = do_exception(regs, VM_WRITE, trans_exc_code);
        if (unlikely(fault))
-                do_fault_error(regs, fault);
+                do_fault_error(regs, 4, trans_exc_code, fault);
 }
-void __kprobes do_dat_exception(struct pt_regs *regs)
+void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code,
+                                unsigned long trans_exc_code)
 {
        int access, fault;
        access = VM_READ | VM_EXEC | VM_WRITE;
-        fault = do_exception(regs, access);
+        fault = do_exception(regs, access, trans_exc_code);
        if (unlikely(fault))
-                do_fault_error(regs, fault);
+                do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault);
 }
 #ifdef CONFIG_64BIT
-void __kprobes do_asce_exception(struct pt_regs *regs)
+void __kprobes do_asce_exception(struct pt_regs *regs, long pgm_int_code,
+                                 unsigned long trans_exc_code)
 {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
-        unsigned long trans_exc_code;
-        /*
-         * The instruction that caused the program check has
-         * been nullified. Don't signal single step via SIGTRAP.
-         */
-        clear_tsk_thread_flag(current, TIF_PER_TRAP);
-        trans_exc_code = regs->int_parm_long;
        if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
                goto no_context;
@@ -448,13 +439,13 @@ void __kprobes do_asce_exception(struct pt_regs *regs)
        }
        /* User mode accesses just cause a SIGSEGV */
-        if (user_mode(regs)) {
+        if (regs->psw.mask & PSW_MASK_PSTATE) {
-                do_sigsegv(regs, SEGV_MAPERR);
+                do_sigsegv(regs, pgm_int_code, SEGV_MAPERR, trans_exc_code);
                return;
        }
 no_context:
-        do_no_context(regs);
+        do_no_context(regs, pgm_int_code, trans_exc_code);
 }
 #endif
@@ -463,22 +454,20 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write)
        struct pt_regs regs;
        int access, fault;
-        /* Emulate a uaccess fault from kernel mode. */
+        regs.psw.mask = psw_kernel_bits;
-        regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK;
        if (!irqs_disabled())
                regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT;
        regs.psw.addr = (unsigned long) __builtin_return_address(0);
        regs.psw.addr |= PSW_ADDR_AMODE;
-        regs.int_code = pgm_int_code;
+        uaddr &= PAGE_MASK;
-        regs.int_parm_long = (uaddr & PAGE_MASK) | 2;
        access = write ? VM_WRITE : VM_READ;
-        fault = do_exception(&regs, access);
+        fault = do_exception(&regs, access, uaddr | 2);
-        /*
+        if (unlikely(fault)) {
-         * Since the fault happened in kernel mode while performing a uaccess
+                if (fault & VM_FAULT_OOM)
-         * all we need to do now is emulating a fixup in case "fault" is not
+                        return -EFAULT;
-         * zero.
+                else if (fault & VM_FAULT_SIGBUS)
-         * For the calling uaccess functions this results always in -EFAULT.
+                        do_sigbus(&regs, pgm_int_code, uaddr);
-         */
+        }
        return fault ? -EFAULT : 0;
 }
@@ -520,7 +509,7 @@ int pfault_init(void)
                .reserved = __PF_RES_FIELD };
        int rc;
-        if (pfault_disable)
+        if (!MACHINE_IS_VM || pfault_disable)
                return -1;
        asm volatile(
                "       diag    %1,%0,0x258\n"
@@ -541,7 +530,7 @@ void pfault_fini(void)
                .refversn = 2,
        };
-        if (pfault_disable)
+        if (!MACHINE_IS_VM || pfault_disable)
                return;
        asm volatile(
                "       diag    %0,0,0x258\n"
@@ -553,7 +542,7 @@ void pfault_fini(void)
 static DEFINE_SPINLOCK(pfault_lock);
 static LIST_HEAD(pfault_list);
-static void pfault_interrupt(struct ext_code ext_code,
+static void pfault_interrupt(unsigned int ext_int_code,
                             unsigned int param32, unsigned long param64)
 {
        struct task_struct *tsk;
@@ -566,19 +555,23 @@ static void pfault_interrupt(struct ext_code ext_code,
         * in the 'cpu address' field associated with the
         * external interrupt. 
         */
-        subcode = ext_code.subcode;
+        subcode = ext_int_code >> 16;
        if ((subcode & 0xff00) != __SUBCODE_MASK)
                return;
-        inc_irq_stat(IRQEXT_PFL);
+        kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
-        /* Get the token (= pid of the affected task). */
+        if (subcode & 0x0080) {
-        pid = sizeof(void *) == 4 ? param32 : param64;
+                /* Get the token (= pid of the affected task). */
-        rcu_read_lock();
+                pid = sizeof(void *) == 4 ? param32 : param64;
-        tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+                rcu_read_lock();
-        if (tsk)
+                tsk = find_task_by_pid_ns(pid, &init_pid_ns);
-                get_task_struct(tsk);
+                if (tsk)
-        rcu_read_unlock();
+                        get_task_struct(tsk);
-        if (!tsk)
+                rcu_read_unlock();
-                return;
+                if (!tsk)
+                        return;
+        } else {
+                tsk = current;
+        }
        spin_lock(&pfault_lock);
        if (subcode & 0x0080) {
                /* signal bit is set -> a page has been swapped in by VM */
@@ -591,47 +584,30 @@ static void pfault_interrupt(struct ext_code ext_code,
                        tsk->thread.pfault_wait = 0;
                        list_del(&tsk->thread.list);
                        wake_up_process(tsk);
-                        put_task_struct(tsk);
                } else {
                        /* Completion interrupt was faster than initial
                         * interrupt. Set pfault_wait to -1 so the initial
-                         * interrupt doesn't put the task to sleep.
+                         * interrupt doesn't put the task to sleep. */
-                         * If the task is not running, ignore the completion
+                        tsk->thread.pfault_wait = -1;
-                         * interrupt since it must be a leftover of a PFAULT
-                         * CANCEL operation which didn't remove all pending
-                         * completion interrupts. */
-                        if (tsk->state == TASK_RUNNING)
-                                tsk->thread.pfault_wait = -1;
                }
+                put_task_struct(tsk);
        } else {
                /* signal bit not set -> a real page is missing. */
-                if (WARN_ON_ONCE(tsk != current))
+                if (tsk->thread.pfault_wait == -1) {
-                        goto out;
-                if (tsk->thread.pfault_wait == 1) {
-                        /* Already on the list with a reference: put to sleep */
-                        __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-                        set_tsk_need_resched(tsk);
-                } else if (tsk->thread.pfault_wait == -1) {
                        /* Completion interrupt was faster than the initial
                         * interrupt (pfault_wait == -1). Set pfault_wait
                         * back to zero and exit. */
                        tsk->thread.pfault_wait = 0;
                } else {
                        /* Initial interrupt arrived before completion
-                         * interrupt. Let the task sleep.
+                         * interrupt. Let the task sleep. */
-                         * An extra task reference is needed since a different
-                         * cpu may set the task state to TASK_RUNNING again
-                         * before the scheduler is reached. */
-                        get_task_struct(tsk);
                        tsk->thread.pfault_wait = 1;
                        list_add(&tsk->thread.list, &pfault_list);
-                        __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+                        set_task_state(tsk, TASK_UNINTERRUPTIBLE);
                        set_tsk_need_resched(tsk);
                }
        }
-out:
        spin_unlock(&pfault_lock);
-        put_task_struct(tsk);
 }
 static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
@@ -640,15 +616,15 @@ static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
        struct thread_struct *thread, *next;
        struct task_struct *tsk;
-        switch (action & ~CPU_TASKS_FROZEN) {
+        switch (action) {
        case CPU_DEAD:
+        case CPU_DEAD_FROZEN:
                spin_lock_irq(&pfault_lock);
                list_for_each_entry_safe(thread, next, &pfault_list, list) {
                        thread->pfault_wait = 0;
                        list_del(&thread->list);
                        tsk = container_of(thread, struct task_struct, thread);
                        wake_up_process(tsk);
-                        put_task_struct(tsk);
                }
                spin_unlock_irq(&pfault_lock);
                break;
@@ -662,6 +638,8 @@ static int __init pfault_irq_init(void)
 {
        int rc;
+        if (!MACHINE_IS_VM)
+                return 0;
        rc = register_external_interrupt(0x2603, pfault_interrupt);
        if (rc)
                goto out_extint;
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 1f5315d1215..65cb06e2af4 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -115,18 +115,9 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
                pmd = *pmdp;
                barrier();
                next = pmd_addr_end(addr, end);
-                /*
+                if (pmd_none(pmd))
-                 * The pmd_trans_splitting() check below explains why
-                 * pmdp_splitting_flush() has to serialize with
-                 * smp_call_function() against our disabled IRQs, to stop
-                 * this gup-fast code from running while we set the
-                 * splitting bit in the pmd. Returning zero will take
-                 * the slow path that will call wait_split_huge_page()
-                 * if the pmd is still in splitting state.
-                 */
-                if (pmd_none(pmd) || pmd_trans_splitting(pmd))
                        return 0;
-                if (unlikely(pmd_large(pmd))) {
+                if (unlikely(pmd_huge(pmd))) {
                        if (!gup_huge_pmd(pmdp, pmd, addr, next,
                                          write, pages, nr))
                                return 0;
@@ -163,42 +154,6 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
        return 1;
 }
-/*
- * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
- * back to the regular GUP.
- */
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
-                          struct page **pages)
-{
-        struct mm_struct *mm = current->mm;
-        unsigned long addr, len, end;
-        unsigned long next, flags;
-        pgd_t *pgdp, pgd;
-        int nr = 0;
-        start &= PAGE_MASK;
-        addr = start;
-        len = (unsigned long) nr_pages << PAGE_SHIFT;
-        end = start + len;
-        if ((end < start) || (end > TASK_SIZE))
-                return 0;
-        local_irq_save(flags);
-        pgdp = pgd_offset(mm, addr);
-        do {
-                pgd = *pgdp;
-                barrier();
-                next = pgd_addr_end(addr, end);
-                if (pgd_none(pgd))
-                        break;
-                if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
-                        break;
-        } while (pgdp++, addr = next, addr != end);
-        local_irq_restore(flags);
-        return nr;
-}
 /**
 * get_user_pages_fast() - pin user pages in memory
 * @start:      starting user address
@@ -228,7 +183,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
        addr = start;
        len = (unsigned long) nr_pages << PAGE_SHIFT;
        end = start + len;
-        if ((end < start) || (end > TASK_SIZE))
+        if (end < start)
                goto slow_irqon;
        /*
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 532525ec88c..597bb2d27c3 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -1,7 +1,7 @@
 /*
 *  IBM System z Huge TLB Page Support for Kernel.
 *
- *    Copyright IBM Corp. 2007
+ *    Copyright 2007 IBM Corp.
 *    Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
 */
@@ -58,8 +58,6 @@ void arch_release_hugepage(struct page *page)
        ptep = (pte_t *) page[1].index;
        if (!ptep)
                return;
-        clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY,
-                    PTRS_PER_PTE * sizeof(pte_t));
        page_table_free(&init_mm, (unsigned long *) ptep);
        page[1].index = 0;
 }
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index ae672f41c46..59b663109d9 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -1,6 +1,8 @@
 /*
+ *  arch/s390/mm/init.c
+ *
 *  S390 version
- *    Copyright IBM Corp. 1999
+ *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
 *    Author(s): Hartmut Penner (hp@de.ibm.com)
 *
 *  Derived from "arch/i386/mm/init.c"
@@ -24,9 +26,9 @@
 #include <linux/pfn.h>
 #include <linux/poison.h>
 #include <linux/initrd.h>
-#include <linux/export.h>
 #include <linux/gfp.h>
 #include <asm/processor.h>
+#include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -35,14 +37,13 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
-#include <asm/ctl_reg.h>
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
 unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL(empty_zero_page);
-static unsigned long __init setup_zero_pages(void)
+static unsigned long setup_zero_pages(void)
 {
        struct cpuid cpu_id;
        unsigned int order;
@@ -91,22 +92,18 @@ static unsigned long __init setup_zero_pages(void)
 void __init paging_init(void)
 {
        unsigned long max_zone_pfns[MAX_NR_ZONES];
-        unsigned long pgd_type, asce_bits;
+        unsigned long pgd_type;
        init_mm.pgd = swapper_pg_dir;
+        S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK;
 #ifdef CONFIG_64BIT
-        if (VMALLOC_END > (1UL << 42)) {
+        /* A three level page table (4TB) is enough for the kernel space. */
-                asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+        S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
-                pgd_type = _REGION2_ENTRY_EMPTY;
+        pgd_type = _REGION3_ENTRY_EMPTY;
-        } else {
-                asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
-                pgd_type = _REGION3_ENTRY_EMPTY;
-        }
 #else
-        asce_bits = _ASCE_TABLE_LENGTH;
+        S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH;
        pgd_type = _SEGMENT_ENTRY_EMPTY;
 #endif
-        S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
        clear_table((unsigned long *) init_mm.pgd, pgd_type,
                    sizeof(unsigned long)*2048);
        vmem_map_init();
@@ -125,6 +122,7 @@ void __init paging_init(void)
        max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
        max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
        free_area_init_nodes(max_zone_pfns);
+        fault_init();
 }
 void __init mem_init(void)
@@ -158,6 +156,34 @@ void __init mem_init(void)
               PFN_ALIGN((unsigned long)&_eshared) - 1);
 }
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+        pgd_t *pgd;
+        pud_t *pud;
+        pmd_t *pmd;
+        pte_t *pte;
+        unsigned long address;
+        int i;
+        for (i = 0; i < numpages; i++) {
+                address = page_to_phys(page + i);
+                pgd = pgd_offset_k(address);
+                pud = pud_offset(pgd, address);
+                pmd = pmd_offset(pud, address);
+                pte = pte_offset_kernel(pmd, address);
+                if (!enable) {
+                        __ptep_ipte(address, pte);
+                        pte_val(*pte) = _PAGE_TYPE_EMPTY;
+                        continue;
+                }
+                *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
+                /* Flush cpu write queue. */
+                mb();
+        }
+}
+#endif
 void free_init_pages(char *what, unsigned long begin, unsigned long end)
 {
        unsigned long addr = begin;
@@ -183,7 +209,7 @@ void free_initmem(void)
 }
 #ifdef CONFIG_BLK_DEV_INITRD
-void __init free_initrd_mem(unsigned long start, unsigned long end)
+void free_initrd_mem(unsigned long start, unsigned long end)
 {
        free_init_pages("initrd memory", start, end);
 }
@@ -192,38 +218,16 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size)
 {
-        unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
+        struct pglist_data *pgdat;
-        unsigned long start_pfn = PFN_DOWN(start);
-        unsigned long size_pages = PFN_DOWN(size);
        struct zone *zone;
        int rc;
+        pgdat = NODE_DATA(nid);
+        zone = pgdat->node_zones + ZONE_MOVABLE;
        rc = vmem_add_mapping(start, size);
        if (rc)
                return rc;
-        for_each_zone(zone) {
+        rc = __add_pages(nid, zone, PFN_DOWN(start), PFN_DOWN(size));
-                if (zone_idx(zone) != ZONE_MOVABLE) {
-                        /* Add range within existing zone limits */
-                        zone_start_pfn = zone->zone_start_pfn;
-                        zone_end_pfn = zone->zone_start_pfn +
-                                       zone->spanned_pages;
-                } else {
-                        /* Add remaining range to ZONE_MOVABLE */
-                        zone_start_pfn = start_pfn;
-                        zone_end_pfn = start_pfn + size_pages;
-                }
-                if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
-                        continue;
-                nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
-                           zone_end_pfn - start_pfn : size_pages;
-                rc = __add_pages(nid, zone, start_pfn, nr_pages);
-                if (rc)
-                        break;
-                start_pfn += nr_pages;
-                size_pages -= nr_pages;
-                if (!size_pages)
-                        break;
-        }
        if (rc)
                vmem_remove_mapping(start, size);
        return rc;
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 921fa541dc0..5dbbaa6e594 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -11,9 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/gfp.h>
+#include <asm/system.h>
-#include <linux/cpu.h>
-#include <asm/ctl_reg.h>
 /*
 * This function writes to kernel memory bypassing DAT and possible
@@ -62,14 +60,18 @@ long probe_kernel_write(void *dst, const void *src, size_t size)
        return copied < 0 ? -EFAULT : 0;
 }
-static int __memcpy_real(void *dest, void *src, size_t count)
+int memcpy_real(void *dest, void *src, size_t count)
 {
        register unsigned long _dest asm("2") = (unsigned long) dest;
        register unsigned long _len1 asm("3") = (unsigned long) count;
        register unsigned long _src  asm("4") = (unsigned long) src;
        register unsigned long _len2 asm("5") = (unsigned long) count;
+        unsigned long flags;
        int rc = -EFAULT;
+        if (!count)
+                return 0;
+        flags = __arch_local_irq_stnsm(0xf8UL);
        asm volatile (
                "0:     mvcle   %1,%2,0x0\n"
                "1:     jo      0b\n"
@@ -80,150 +82,22 @@ static int __memcpy_real(void *dest, void *src, size_t count)
                  "+d" (_len2), "=m" (*((long *) dest))
                : "m" (*((long *) src))
                : "cc", "memory");
+        arch_local_irq_restore(flags);
        return rc;
 }
 /*
- * Copy memory in real mode (kernel to kernel)
+ * Copy memory to absolute zero
- */
-int memcpy_real(void *dest, void *src, size_t count)
-{
-        unsigned long flags;
-        int rc;
-        if (!count)
-                return 0;
-        local_irq_save(flags);
-        __arch_local_irq_stnsm(0xfbUL);
-        rc = __memcpy_real(dest, src, count);
-        local_irq_restore(flags);
-        return rc;
-}
-/*
- * Copy memory in absolute mode (kernel to kernel)
 */
-void memcpy_absolute(void *dest, void *src, size_t count)
+void copy_to_absolute_zero(void *dest, void *src, size_t count)
 {
-        unsigned long cr0, flags, prefix;
+        unsigned long cr0;
-        flags = arch_local_irq_save();
+        BUG_ON((unsigned long) dest + count >= sizeof(struct _lowcore));
+        preempt_disable();
        __ctl_store(cr0, 0, 0);
        __ctl_clear_bit(0, 28); /* disable lowcore protection */
-        prefix = store_prefix();
+        memcpy_real(dest + store_prefix(), src, count);
-        if (prefix) {
-                local_mcck_disable();
-                set_prefix(0);
-                memcpy(dest, src, count);
-                set_prefix(prefix);
-                local_mcck_enable();
-        } else {
-                memcpy(dest, src, count);
-        }
        __ctl_load(cr0, 0, 0);
-        arch_local_irq_restore(flags);
-}
-/*
- * Copy memory from kernel (real) to user (virtual)
- */
-int copy_to_user_real(void __user *dest, void *src, size_t count)
-{
-        int offs = 0, size, rc;
-        char *buf;
-        buf = (char *) __get_free_page(GFP_KERNEL);
-        if (!buf)
-                return -ENOMEM;
-        rc = -EFAULT;
-        while (offs < count) {
-                size = min(PAGE_SIZE, count - offs);
-                if (memcpy_real(buf, src + offs, size))
-                        goto out;
-                if (copy_to_user(dest + offs, buf, size))
-                        goto out;
-                offs += size;
-        }
-        rc = 0;
-out:
-        free_page((unsigned long) buf);
-        return rc;
-}
-/*
- * Copy memory from user (virtual) to kernel (real)
- */
-int copy_from_user_real(void *dest, void __user *src, size_t count)
-{
-        int offs = 0, size, rc;
-        char *buf;
-        buf = (char *) __get_free_page(GFP_KERNEL);
-        if (!buf)
-                return -ENOMEM;
-        rc = -EFAULT;
-        while (offs < count) {
-                size = min(PAGE_SIZE, count - offs);
-                if (copy_from_user(buf, src + offs, size))
-                        goto out;
-                if (memcpy_real(dest + offs, buf, size))
-                        goto out;
-                offs += size;
-        }
-        rc = 0;
-out:
-        free_page((unsigned long) buf);
-        return rc;
-}
-/*
- * Check if physical address is within prefix or zero page
- */
-static int is_swapped(unsigned long addr)
-{
-        unsigned long lc;
-        int cpu;
-        if (addr < sizeof(struct _lowcore))
-                return 1;
-        for_each_online_cpu(cpu) {
-                lc = (unsigned long) lowcore_ptr[cpu];
-                if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc)
-                        continue;
-                return 1;
-        }
-        return 0;
-}
-/*
- * Convert a physical pointer for /dev/mem access
- *
- * For swapped prefix pages a new buffer is returned that contains a copy of
- * the absolute memory. The buffer size is maximum one page large.
- */
-void *xlate_dev_mem_ptr(unsigned long addr)
-{
-        void *bounce = (void *) addr;
-        unsigned long size;
-        get_online_cpus();
-        preempt_disable();
-        if (is_swapped(addr)) {
-                size = PAGE_SIZE - (addr & ~PAGE_MASK);
-                bounce = (void *) __get_free_page(GFP_ATOMIC);
-                if (bounce)
-                        memcpy_absolute(bounce, (void *) addr, size);
-        }
        preempt_enable();
-        put_online_cpus();
-        return bounce;
-}
-/*
- * Free converted buffer for /dev/mem access (if necessary)
- */
-void unxlate_dev_mem_ptr(unsigned long addr, void *buf)
-{
-        if ((void *) addr != buf)
-                free_page((unsigned long) buf);
 }
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index c59a5efa58b..c9a9f7f1818 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -1,4 +1,6 @@
 /*
+ *  linux/arch/s390/mm/mmap.c
+ *
 *  flexible mmap layout support
 *
 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
@@ -24,11 +26,10 @@
 #include <linux/personality.h>
 #include <linux/mm.h>
-#include <linux/mman.h>
 #include <linux/module.h>
 #include <linux/random.h>
-#include <linux/compat.h>
 #include <asm/pgalloc.h>
+#include <asm/compat.h>
 static unsigned long stack_maxrandom_size(void)
 {
@@ -98,20 +99,15 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
                mm->unmap_area = arch_unmap_area_topdown;
        }
 }
+EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
 #else
 int s390_mmap_check(unsigned long addr, unsigned long len)
 {
-        int rc;
        if (!is_compat_task() &&
-            len >= TASK_SIZE && TASK_SIZE < (1UL << 53)) {
+            len >= TASK_SIZE && TASK_SIZE < (1UL << 53))
-                rc = crst_table_upgrade(current->mm, 1UL << 53);
+                return crst_table_upgrade(current->mm, 1UL << 53);
-                if (rc)
-                        return rc;
-                update_mm(current->mm, current);
-        }
        return 0;
 }
@@ -131,7 +127,6 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
                rc = crst_table_upgrade(mm, 1UL << 53);
                if (rc)
                        return (unsigned long) rc;
-                update_mm(mm, current);
                area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
        }
        return area;
@@ -154,7 +149,6 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
                rc = crst_table_upgrade(mm, 1UL << 53);
                if (rc)
                        return (unsigned long) rc;
-                update_mm(mm, current);
                area = arch_get_unmapped_area_topdown(filp, addr, len,
                                                      pgoff, flags);
        }
@@ -180,5 +174,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
                mm->unmap_area = arch_unmap_area_topdown;
        }
 }
+EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
 #endif
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 29ccee3651f..d013ed39743 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -2,79 +2,30 @@
 * Copyright IBM Corp. 2011
 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
 */
-#include <linux/hugetlb.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <asm/cacheflush.h>
+#include <linux/hugetlb.h>
 #include <asm/pgtable.h>
-#include <asm/page.h>
-void storage_key_init_range(unsigned long start, unsigned long end)
-{
-        unsigned long boundary, function, size;
-        while (start < end) {
-                if (MACHINE_HAS_EDAT2) {
-                        /* set storage keys for a 2GB frame */
-                        function = 0x22000 | PAGE_DEFAULT_KEY;
-                        size = 1UL << 31;
-                        boundary = (start + size) & ~(size - 1);
-                        if (boundary <= end) {
-                                do {
-                                        start = pfmf(function, start);
-                                } while (start < boundary);
-                                continue;
-                        }
-                }
-                if (MACHINE_HAS_EDAT1) {
-                        /* set storage keys for a 1MB frame */
-                        function = 0x21000 | PAGE_DEFAULT_KEY;
-                        size = 1UL << 20;
-                        boundary = (start + size) & ~(size - 1);
-                        if (boundary <= end) {
-                                do {
-                                        start = pfmf(function, start);
-                                } while (start < boundary);
-                                continue;
-                        }
-                }
-                page_set_storage_key(start, PAGE_DEFAULT_KEY, 0);
-                start += PAGE_SIZE;
-        }
-}
-static pte_t *walk_page_table(unsigned long addr)
-{
-        pgd_t *pgdp;
-        pud_t *pudp;
-        pmd_t *pmdp;
-        pte_t *ptep;
-        pgdp = pgd_offset_k(addr);
-        if (pgd_none(*pgdp))
-                return NULL;
-        pudp = pud_offset(pgdp, addr);
-        if (pud_none(*pudp) || pud_large(*pudp))
-                return NULL;
-        pmdp = pmd_offset(pudp, addr);
-        if (pmd_none(*pmdp) || pmd_large(*pmdp))
-                return NULL;
-        ptep = pte_offset_kernel(pmdp, addr);
-        if (pte_none(*ptep))
-                return NULL;
-        return ptep;
-}
 static void change_page_attr(unsigned long addr, int numpages,
                             pte_t (*set) (pte_t))
 {
        pte_t *ptep, pte;
+        pmd_t *pmdp;
+        pud_t *pudp;
+        pgd_t *pgdp;
        int i;
        for (i = 0; i < numpages; i++) {
-                ptep = walk_page_table(addr);
+                pgdp = pgd_offset(&init_mm, addr);
-                if (WARN_ON_ONCE(!ptep))
+                pudp = pud_offset(pgdp, addr);
-                        break;
+                pmdp = pmd_offset(pudp, addr);
+                if (pmd_huge(*pmdp)) {
+                        WARN_ON_ONCE(1);
+                        continue;
+                }
+                ptep = pte_offset_kernel(pmdp, addr);
                pte = *ptep;
                pte = set(pte);
                __ptep_ipte(addr, ptep);
@@ -88,63 +39,23 @@ int set_memory_ro(unsigned long addr, int numpages)
        change_page_attr(addr, numpages, pte_wrprotect);
        return 0;
 }
+EXPORT_SYMBOL_GPL(set_memory_ro);
 int set_memory_rw(unsigned long addr, int numpages)
 {
        change_page_attr(addr, numpages, pte_mkwrite);
        return 0;
 }
+EXPORT_SYMBOL_GPL(set_memory_rw);
 /* not possible */
 int set_memory_nx(unsigned long addr, int numpages)
 {
        return 0;
 }
+EXPORT_SYMBOL_GPL(set_memory_nx);
 int set_memory_x(unsigned long addr, int numpages)
 {
        return 0;
 }
-#ifdef CONFIG_DEBUG_PAGEALLOC
-void kernel_map_pages(struct page *page, int numpages, int enable)
-{
-        unsigned long address;
-        pgd_t *pgd;
-        pud_t *pud;
-        pmd_t *pmd;
-        pte_t *pte;
-        int i;
-        for (i = 0; i < numpages; i++) {
-                address = page_to_phys(page + i);
-                pgd = pgd_offset_k(address);
-                pud = pud_offset(pgd, address);
-                pmd = pmd_offset(pud, address);
-                pte = pte_offset_kernel(pmd, address);
-                if (!enable) {
-                        __ptep_ipte(address, pte);
-                        pte_val(*pte) = _PAGE_TYPE_EMPTY;
-                        continue;
-                }
-                *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
-        }
-}
-#ifdef CONFIG_HIBERNATION
-bool kernel_page_present(struct page *page)
-{
-        unsigned long addr;
-        int cc;
-        addr = page_to_phys(page);
-        asm volatile(
-                "       lra     %1,0(%1)\n"
-                "       ipm     %0\n"
-                "       srl     %0,28"
-                : "=d" (cc), "+a" (addr) : : "cc");
-        return cc == 0;
-}
-#endif /* CONFIG_HIBERNATION */
-#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index ae44d2a3431..529a0883837 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1,5 +1,5 @@
 /*
- *    Copyright IBM Corp. 2007, 2011
+ *    Copyright IBM Corp. 2007,2009
 *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
 */
@@ -18,6 +18,7 @@
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
+#include <asm/system.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
@@ -32,6 +33,17 @@
 #define FRAG_MASK       0x03
 #endif
+unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
+EXPORT_SYMBOL(VMALLOC_START);
+static int __init parse_vmalloc(char *arg)
+{
+        if (!arg)
+                return -EINVAL;
+        VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK;
+        return 0;
+}
+early_param("vmalloc", parse_vmalloc);
 unsigned long *crst_table_alloc(struct mm_struct *mm)
 {
@@ -85,6 +97,7 @@ repeat:
                crst_table_free(mm, table);
        if (mm->context.asce_limit < limit)
                goto repeat;
+        update_mm(mm, current);
        return 0;
 }
@@ -92,6 +105,9 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
 {
        pgd_t *pgd;
+        if (mm->context.asce_limit <= limit)
+                return;
+        __tlb_flush_mm(mm);
        while (mm->context.asce_limit > limit) {
                pgd = mm->pgd;
                switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
@@ -114,6 +130,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
                mm->task_size = mm->context.asce_limit;
                crst_table_free(mm, (unsigned long *) pgd);
        }
+        update_mm(mm, current);
 }
 #endif
@@ -205,7 +222,6 @@ void gmap_free(struct gmap *gmap)
        /* Free all segment & region tables. */
        down_read(&gmap->mm->mmap_sem);
-        spin_lock(&gmap->mm->page_table_lock);
        list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
                table = (unsigned long *) page_to_phys(page);
                if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
@@ -214,7 +230,6 @@ void gmap_free(struct gmap *gmap)
                                gmap_unlink_segment(gmap, table);
                __free_pages(page, ALLOC_ORDER);
        }
-        spin_unlock(&gmap->mm->page_table_lock);
        up_read(&gmap->mm->mmap_sem);
        list_del(&gmap->list);
        kfree(gmap);
@@ -241,29 +256,25 @@ void gmap_disable(struct gmap *gmap)
 }
 EXPORT_SYMBOL_GPL(gmap_disable);
-/*
- * gmap_alloc_table is assumed to be called with mmap_sem held
- */
 static int gmap_alloc_table(struct gmap *gmap,
                               unsigned long *table, unsigned long init)
 {
        struct page *page;
        unsigned long *new;
-        /* since we dont free the gmap table until gmap_free we can unlock */
-        spin_unlock(&gmap->mm->page_table_lock);
        page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
-        spin_lock(&gmap->mm->page_table_lock);
        if (!page)
                return -ENOMEM;
        new = (unsigned long *) page_to_phys(page);
        crst_table_init(new, init);
+        down_read(&gmap->mm->mmap_sem);
        if (*table & _REGION_ENTRY_INV) {
                list_add(&page->lru, &gmap->crst_list);
                *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
                        (*table & _REGION_ENTRY_TYPE_MASK);
        } else
                __free_pages(page, ALLOC_ORDER);
+        up_read(&gmap->mm->mmap_sem);
        return 0;
 }
@@ -288,7 +299,6 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
        flush = 0;
        down_read(&gmap->mm->mmap_sem);
-        spin_lock(&gmap->mm->page_table_lock);
        for (off = 0; off < len; off += PMD_SIZE) {
                /* Walk the guest addr space page table */
                table = gmap->table + (((to + off) >> 53) & 0x7ff);
@@ -310,7 +320,6 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
                *table = _SEGMENT_ENTRY_INV;
        }
 out:
-        spin_unlock(&gmap->mm->page_table_lock);
        up_read(&gmap->mm->mmap_sem);
        if (flush)
                gmap_flush_tlb(gmap);
@@ -341,7 +350,6 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
        flush = 0;
        down_read(&gmap->mm->mmap_sem);
-        spin_lock(&gmap->mm->page_table_lock);
        for (off = 0; off < len; off += PMD_SIZE) {
                /* Walk the gmap address space page table */
                table = gmap->table + (((to + off) >> 53) & 0x7ff);
@@ -365,24 +373,19 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
                flush |= gmap_unlink_segment(gmap, table);
                *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off);
        }
-        spin_unlock(&gmap->mm->page_table_lock);
        up_read(&gmap->mm->mmap_sem);
        if (flush)
                gmap_flush_tlb(gmap);
        return 0;
 out_unmap:
-        spin_unlock(&gmap->mm->page_table_lock);
        up_read(&gmap->mm->mmap_sem);
        gmap_unmap_segment(gmap, to, len);
        return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(gmap_map_segment);
-/*
+unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
- * this function is assumed to be called with mmap_sem held
- */
-unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
 {
        unsigned long *table, vmaddr, segment;
        struct mm_struct *mm;
@@ -442,75 +445,16 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
                page = pmd_page(*pmd);
                mp = (struct gmap_pgtable *) page->index;
                rmap->entry = table;
-                spin_lock(&mm->page_table_lock);
                list_add(&rmap->list, &mp->mapper);
-                spin_unlock(&mm->page_table_lock);
                /* Set gmap segment table entry to page table. */
                *table = pmd_val(*pmd) & PAGE_MASK;
                return vmaddr | (address & ~PMD_MASK);
        }
        return -EFAULT;
-}
-unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
-{
-        unsigned long rc;
-        down_read(&gmap->mm->mmap_sem);
-        rc = __gmap_fault(address, gmap);
-        up_read(&gmap->mm->mmap_sem);
-        return rc;
 }
 EXPORT_SYMBOL_GPL(gmap_fault);
-void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
-{
-        unsigned long *table, address, size;
-        struct vm_area_struct *vma;
-        struct gmap_pgtable *mp;
-        struct page *page;
-        down_read(&gmap->mm->mmap_sem);
-        address = from;
-        while (address < to) {
-                /* Walk the gmap address space page table */
-                table = gmap->table + ((address >> 53) & 0x7ff);
-                if (unlikely(*table & _REGION_ENTRY_INV)) {
-                        address = (address + PMD_SIZE) & PMD_MASK;
-                        continue;
-                }
-                table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-                table = table + ((address >> 42) & 0x7ff);
-                if (unlikely(*table & _REGION_ENTRY_INV)) {
-                        address = (address + PMD_SIZE) & PMD_MASK;
-                        continue;
-                }
-                table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-                table = table + ((address >> 31) & 0x7ff);
-                if (unlikely(*table & _REGION_ENTRY_INV)) {
-                        address = (address + PMD_SIZE) & PMD_MASK;
-                        continue;
-                }
-                table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-                table = table + ((address >> 20) & 0x7ff);
-                if (unlikely(*table & _SEGMENT_ENTRY_INV)) {
-                        address = (address + PMD_SIZE) & PMD_MASK;
-                        continue;
-                }
-                page = pfn_to_page(*table >> PAGE_SHIFT);
-                mp = (struct gmap_pgtable *) page->index;
-                vma = find_vma(gmap->mm, mp->vmaddr);
-                size = min(to - address, PMD_SIZE - (address & ~PMD_MASK));
-                zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK),
-                               size, NULL);
-                address = (address + PMD_SIZE) & PMD_MASK;
-        }
-        up_read(&gmap->mm->mmap_sem);
-}
-EXPORT_SYMBOL_GPL(gmap_discard);
 void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table)
 {
        struct gmap_rmap *rmap, *next;
@@ -568,7 +512,7 @@ static inline void page_table_free_pgste(unsigned long *table)
        page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
        mp = (struct gmap_pgtable *) page->index;
        BUG_ON(!list_empty(&mp->mapper));
-        pgtable_page_dtor(page);
+        pgtable_page_ctor(page);
        atomic_set(&page->_mapcount, -1);
        kfree(mp);
        __free_page(page);
@@ -609,8 +553,8 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
 */
 unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
 {
-        unsigned long *uninitialized_var(table);
+        struct page *page;
-        struct page *uninitialized_var(page);
+        unsigned long *table;
        unsigned int mask, bit;
        if (mm_has_pgste(mm))
@@ -673,6 +617,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
        }
 }
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 static void __page_table_free_rcu(void *table, unsigned bit)
 {
        struct page *page;
@@ -726,90 +672,7 @@ void __tlb_remove_table(void *_table)
                free_pages((unsigned long) table, ALLOC_ORDER);
 }
-static void tlb_remove_table_smp_sync(void *arg)
+#endif
-{
-        /* Simply deliver the interrupt */
-}
-static void tlb_remove_table_one(void *table)
-{
-        /*
-         * This isn't an RCU grace period and hence the page-tables cannot be
-         * assumed to be actually RCU-freed.
-         *
-         * It is however sufficient for software page-table walkers that rely
-         * on IRQ disabling. See the comment near struct mmu_table_batch.
-         */
-        smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
-        __tlb_remove_table(table);
-}
-static void tlb_remove_table_rcu(struct rcu_head *head)
-{
-        struct mmu_table_batch *batch;
-        int i;
-        batch = container_of(head, struct mmu_table_batch, rcu);
-        for (i = 0; i < batch->nr; i++)
-                __tlb_remove_table(batch->tables[i]);
-        free_page((unsigned long)batch);
-}
-void tlb_table_flush(struct mmu_gather *tlb)
-{
-        struct mmu_table_batch **batch = &tlb->batch;
-        if (*batch) {
-                __tlb_flush_mm(tlb->mm);
-                call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
-                *batch = NULL;
-        }
-}
-void tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
-        struct mmu_table_batch **batch = &tlb->batch;
-        if (*batch == NULL) {
-                *batch = (struct mmu_table_batch *)
-                        __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
-                if (*batch == NULL) {
-                        __tlb_flush_mm(tlb->mm);
-                        tlb_remove_table_one(table);
-                        return;
-                }
-                (*batch)->nr = 0;
-        }
-        (*batch)->tables[(*batch)->nr++] = table;
-        if ((*batch)->nr == MAX_TABLE_BATCH)
-                tlb_table_flush(tlb);
-}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-void thp_split_vma(struct vm_area_struct *vma)
-{
-        unsigned long addr;
-        struct page *page;
-        for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
-                page = follow_page(vma, addr, FOLL_SPLIT);
-        }
-}
-void thp_split_mm(struct mm_struct *mm)
-{
-        struct vm_area_struct *vma = mm->mmap;
-        while (vma != NULL) {
-                thp_split_vma(vma);
-                vma->vm_flags &= ~VM_HUGEPAGE;
-                vma->vm_flags |= VM_NOHUGEPAGE;
-                vma = vma->vm_next;
-        }
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 /*
 * switch on pgstes for its userspace process (for kvm)
@@ -820,7 +683,7 @@ int s390_enable_sie(void)
        struct mm_struct *mm, *old_mm;
        /* Do we have switched amode? If no, we cannot do sie */
-        if (s390_user_mode == HOME_SPACE_MODE)
+        if (user_mode == HOME_SPACE_MODE)
                return -EINVAL;
        /* Do we have pgstes? if yes, we are done */
@@ -841,19 +704,11 @@ int s390_enable_sie(void)
        /* we copy the mm and let dup_mm create the page tables with_pgstes */
        tsk->mm->context.alloc_pgste = 1;
-        /* make sure that both mms have a correct rss state */
-        sync_mm_rss(tsk->mm);
        mm = dup_mm(tsk);
        tsk->mm->context.alloc_pgste = 0;
        if (!mm)
                return -ENOMEM;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-        /* split thp mappings and disable thp for future mappings */
-        thp_split_mm(mm);
-        mm->def_flags |= VM_NOHUGEPAGE;
-#endif
        /* Now lets check again if something happened */
        task_lock(tsk);
        if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
@@ -881,80 +736,18 @@ int s390_enable_sie(void)
 }
 EXPORT_SYMBOL_GPL(s390_enable_sie);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION)
-int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
+bool kernel_page_present(struct page *page)
-                           pmd_t *pmdp)
-{
-        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-        /* No need to flush TLB
-         * On s390 reference bits are in storage key and never in TLB */
-        return pmdp_test_and_clear_young(vma, address, pmdp);
-}
-int pmdp_set_access_flags(struct vm_area_struct *vma,
-                          unsigned long address, pmd_t *pmdp,
-                          pmd_t entry, int dirty)
 {
-        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+        unsigned long addr;
+        int cc;
-        if (pmd_same(*pmdp, entry))
-                return 0;
-        pmdp_invalidate(vma, address, pmdp);
-        set_pmd_at(vma->vm_mm, address, pmdp, entry);
-        return 1;
-}
-static void pmdp_splitting_flush_sync(void *arg)
-{
-        /* Simply deliver the interrupt */
-}
-void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
-                          pmd_t *pmdp)
-{
-        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-        if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT,
-                              (unsigned long *) pmdp)) {
-                /* need to serialize against gup-fast (IRQ disabled) */
-                smp_call_function(pmdp_splitting_flush_sync, NULL, 1);
-        }
-}
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
-{
-        struct list_head *lh = (struct list_head *) pgtable;
-        assert_spin_locked(&mm->page_table_lock);
-        /* FIFO */
-        if (!mm->pmd_huge_pte)
-                INIT_LIST_HEAD(lh);
-        else
-                list_add(lh, (struct list_head *) mm->pmd_huge_pte);
-        mm->pmd_huge_pte = pgtable;
-}
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm)
+        addr = page_to_phys(page);
-{
+        asm volatile(
-        struct list_head *lh;
+                "       lra     %1,0(%1)\n"
-        pgtable_t pgtable;
+                "       ipm     %0\n"
-        pte_t *ptep;
+                "       srl     %0,28"
+                : "=d" (cc), "+a" (addr) : : "cc");
-        assert_spin_locked(&mm->page_table_lock);
+        return cc == 0;
-        /* FIFO */
-        pgtable = mm->pmd_huge_pte;
-        lh = (struct list_head *) pgtable;
-        if (list_empty(lh))
-                mm->pmd_huge_pte = NULL;
-        else {
-                mm->pmd_huge_pte = (pgtable_t) lh->next;
-                list_del(lh);
-        }
-        ptep = (pte_t *) pgtable;
-        pte_val(*ptep) = _PAGE_TYPE_EMPTY;
-        ptep++;
-        pte_val(*ptep) = _PAGE_TYPE_EMPTY;
-        return pgtable;
 }
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 6ed1426d27c..781ff516956 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -1,4 +1,6 @@
 /*
+ *  arch/s390/mm/vmem.c
+ *
 *    Copyright IBM Corp. 2006
 *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
 */
@@ -79,8 +81,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
 */
 static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 {
-        unsigned long end = start + size;
+        unsigned long address;
-        unsigned long address = start;
        pgd_t *pg_dir;
        pud_t *pu_dir;
        pmd_t *pm_dir;
@@ -88,8 +89,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
        pte_t  pte;
        int ret = -ENOMEM;
-        while (address < end) {
+        for (address = start; address < start + size; address += PAGE_SIZE) {
-                pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
                pg_dir = pgd_offset_k(address);
                if (pgd_none(*pg_dir)) {
                        pu_dir = vmem_pud_alloc();
@@ -97,30 +97,25 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
                                goto out;
                        pgd_populate(&init_mm, pg_dir, pu_dir);
                }
                pu_dir = pud_offset(pg_dir, address);
-#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
-                if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
-                    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
-                        pte_val(pte) |= _REGION3_ENTRY_LARGE;
-                        pte_val(pte) |= _REGION_ENTRY_TYPE_R3;
-                        pud_val(*pu_dir) = pte_val(pte);
-                        address += PUD_SIZE;
-                        continue;
-                }
-#endif
                if (pud_none(*pu_dir)) {
                        pm_dir = vmem_pmd_alloc();
                        if (!pm_dir)
                                goto out;
                        pud_populate(&init_mm, pu_dir, pm_dir);
                }
+                pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
                pm_dir = pmd_offset(pu_dir, address);
-#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
-                if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
+#ifdef __s390x__
-                    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
+                if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) &&
+                    (address + HPAGE_SIZE <= start + size) &&
+                    (address >= HPAGE_SIZE)) {
                        pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
                        pmd_val(*pm_dir) = pte_val(pte);
-                        address += PMD_SIZE;
+                        address += HPAGE_SIZE - PAGE_SIZE;
                        continue;
                }
 #endif
@@ -133,11 +128,10 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
                pt_dir = pte_offset_kernel(pm_dir, address);
                *pt_dir = pte;
-                address += PAGE_SIZE;
        }
        ret = 0;
 out:
-        flush_tlb_kernel_range(start, end);
+        flush_tlb_kernel_range(start, start + size);
        return ret;
 }
@@ -147,8 +141,7 @@ out:
 */
 static void vmem_remove_range(unsigned long start, unsigned long size)
 {
-        unsigned long end = start + size;
+        unsigned long address;
-        unsigned long address = start;
        pgd_t *pg_dir;
        pud_t *pu_dir;
        pmd_t *pm_dir;
@@ -156,37 +149,25 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
        pte_t  pte;
        pte_val(pte) = _PAGE_TYPE_EMPTY;
-        while (address < end) {
+        for (address = start; address < start + size; address += PAGE_SIZE) {
                pg_dir = pgd_offset_k(address);
-                if (pgd_none(*pg_dir)) {
-                        address += PGDIR_SIZE;
-                        continue;
-                }
                pu_dir = pud_offset(pg_dir, address);
-                if (pud_none(*pu_dir)) {
+                if (pud_none(*pu_dir))
-                        address += PUD_SIZE;
                        continue;
-                }
-                if (pud_large(*pu_dir)) {
-                        pud_clear(pu_dir);
-                        address += PUD_SIZE;
-                        continue;
-                }
                pm_dir = pmd_offset(pu_dir, address);
-                if (pmd_none(*pm_dir)) {
+                if (pmd_none(*pm_dir))
-                        address += PMD_SIZE;
                        continue;
-                }
-                if (pmd_large(*pm_dir)) {
+                if (pmd_huge(*pm_dir)) {
                        pmd_clear(pm_dir);
-                        address += PMD_SIZE;
+                        address += HPAGE_SIZE - PAGE_SIZE;
                        continue;
                }
                pt_dir = pte_offset_kernel(pm_dir, address);
                *pt_dir = pte;
-                address += PAGE_SIZE;
        }
-        flush_tlb_kernel_range(start, end);
+        flush_tlb_kernel_range(start, start + size);
 }
 /*
@@ -205,7 +186,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
        start_addr = (unsigned long) start;
        end_addr = (unsigned long) (start + nr);
-        for (address = start_addr; address < end_addr;) {
+        for (address = start_addr; address < end_addr; address += PAGE_SIZE) {
                pg_dir = pgd_offset_k(address);
                if (pgd_none(*pg_dir)) {
                        pu_dir = vmem_pud_alloc();
@@ -224,33 +205,10 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
                pm_dir = pmd_offset(pu_dir, address);
                if (pmd_none(*pm_dir)) {
-#ifdef CONFIG_64BIT
-                        /* Use 1MB frames for vmemmap if available. We always
-                         * use large frames even if they are only partially
-                         * used.
-                         * Otherwise we would have also page tables since
-                         * vmemmap_populate gets called for each section
-                         * separately. */
-                        if (MACHINE_HAS_EDAT1) {
-                                void *new_page;
-                                new_page = vmemmap_alloc_block(PMD_SIZE, node);
-                                if (!new_page)
-                                        goto out;
-                                pte = mk_pte_phys(__pa(new_page), PAGE_RW);
-                                pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
-                                pmd_val(*pm_dir) = pte_val(pte);
-                                address = (address + PMD_SIZE) & PMD_MASK;
-                                continue;
-                        }
-#endif
                        pt_dir = vmem_pte_alloc(address);
                        if (!pt_dir)
                                goto out;
                        pmd_populate(&init_mm, pm_dir, pt_dir);
-                } else if (pmd_large(*pm_dir)) {
-                        address = (address + PMD_SIZE) & PMD_MASK;
-                        continue;
                }
                pt_dir = pte_offset_kernel(pm_dir, address);
@@ -263,7 +221,6 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
                        pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
                        *pt_dir = pte;
                }
-                address += PAGE_SIZE;
        }
        memset(start, 0, nr * sizeof(struct page));
        ret = 0;
@@ -375,12 +332,9 @@ void __init vmem_map_init(void)
        unsigned long start, end;
        int i;
-        ro_start = PFN_ALIGN((unsigned long)&_stext);
+        ro_start = ((unsigned long)&_stext) & PAGE_MASK;
-        ro_end = (unsigned long)&_eshared & PAGE_MASK;
+        ro_end = PFN_ALIGN((unsigned long)&_eshared);
        for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
-                if (memory_chunk[i].type == CHUNK_CRASHK ||
-                    memory_chunk[i].type == CHUNK_OLDMEM)
-                        continue;
                start = memory_chunk[i].addr;
                end = memory_chunk[i].addr + memory_chunk[i].size;
                if (start >= ro_end || end <= ro_start)
@@ -414,9 +368,6 @@ static int __init vmem_convert_memory_chunk(void)
        for (i = 0; i < MEMORY_CHUNKS; i++) {
                if (!memory_chunk[i].size)
                        continue;
-                if (memory_chunk[i].type == CHUNK_CRASHK ||
-                    memory_chunk[i].type == CHUNK_OLDMEM)
-                        continue;
                seg = kzalloc(sizeof(*seg), GFP_KERNEL);
                if (!seg)
                        panic("Out of memory...\n");
author	Jonathan Herman <hermanjl@cs.unc.edu>	2013-01-17 16:15:55 -0500
committer	Jonathan Herman <hermanjl@cs.unc.edu>	2013-01-17 16:15:55 -0500
commit	8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
tree	a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /arch/s390/mm
parent	406089d01562f1e2bf9f089fd7637009ebaad589 (diff)