aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /arch/s390/mm
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/Makefile11
-rw-r--r--arch/s390/mm/cmm.c2
-rw-r--r--arch/s390/mm/dump_pagetables.c231
-rw-r--r--arch/s390/mm/extable.c81
-rw-r--r--arch/s390/mm/extmem.c3
-rw-r--r--arch/s390/mm/fault.c246
-rw-r--r--arch/s390/mm/gup.c51
-rw-r--r--arch/s390/mm/hugetlbpage.c4
-rw-r--r--arch/s390/mm/init.c86
-rw-r--r--arch/s390/mm/maccess.c152
-rw-r--r--arch/s390/mm/mmap.c19
-rw-r--r--arch/s390/mm/pageattr.c121
-rw-r--r--arch/s390/mm/pgtable.c287
-rw-r--r--arch/s390/mm/vmem.c101
14 files changed, 271 insertions, 1124 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 640bea12303..d98fe9004a5 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -2,9 +2,8 @@
2# Makefile for the linux s390-specific parts of the memory manager. 2# Makefile for the linux s390-specific parts of the memory manager.
3# 3#
4 4
5obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o 5obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \
6obj-y += page-states.o gup.o extable.o pageattr.o 6 page-states.o gup.o
7 7obj-$(CONFIG_CMM) += cmm.o
8obj-$(CONFIG_CMM) += cmm.o 8obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
9obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 9obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o
10obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 479e9428291..1f1dba9dcf5 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Collaborative memory management interface. 2 * Collaborative memory management interface.
3 * 3 *
4 * Copyright IBM Corp 2003, 2010 4 * Copyright IBM Corp 2003,2010
5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, 5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
6 * 6 *
7 */ 7 */
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
deleted file mode 100644
index 04e4892247d..00000000000
--- a/arch/s390/mm/dump_pagetables.c
+++ /dev/null
@@ -1,231 +0,0 @@
1#include <linux/seq_file.h>
2#include <linux/debugfs.h>
3#include <linux/module.h>
4#include <linux/mm.h>
5#include <asm/sections.h>
6#include <asm/pgtable.h>
7
8static unsigned long max_addr;
9
10struct addr_marker {
11 unsigned long start_address;
12 const char *name;
13};
14
15enum address_markers_idx {
16 IDENTITY_NR = 0,
17 KERNEL_START_NR,
18 KERNEL_END_NR,
19 VMEMMAP_NR,
20 VMALLOC_NR,
21#ifdef CONFIG_64BIT
22 MODULES_NR,
23#endif
24};
25
26static struct addr_marker address_markers[] = {
27 [IDENTITY_NR] = {0, "Identity Mapping"},
28 [KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"},
29 [KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"},
30 [VMEMMAP_NR] = {0, "vmemmap Area"},
31 [VMALLOC_NR] = {0, "vmalloc Area"},
32#ifdef CONFIG_64BIT
33 [MODULES_NR] = {0, "Modules Area"},
34#endif
35 { -1, NULL }
36};
37
38struct pg_state {
39 int level;
40 unsigned int current_prot;
41 unsigned long start_address;
42 unsigned long current_address;
43 const struct addr_marker *marker;
44};
45
46static void print_prot(struct seq_file *m, unsigned int pr, int level)
47{
48 static const char * const level_name[] =
49 { "ASCE", "PGD", "PUD", "PMD", "PTE" };
50
51 seq_printf(m, "%s ", level_name[level]);
52 if (pr & _PAGE_INVALID)
53 seq_printf(m, "I\n");
54 else
55 seq_printf(m, "%s\n", pr & _PAGE_RO ? "RO" : "RW");
56}
57
58static void note_page(struct seq_file *m, struct pg_state *st,
59 unsigned int new_prot, int level)
60{
61 static const char units[] = "KMGTPE";
62 int width = sizeof(unsigned long) * 2;
63 const char *unit = units;
64 unsigned int prot, cur;
65 unsigned long delta;
66
67 /*
68 * If we have a "break" in the series, we need to flush the state
69 * that we have now. "break" is either changing perms, levels or
70 * address space marker.
71 */
72 prot = new_prot;
73 cur = st->current_prot;
74
75 if (!st->level) {
76 /* First entry */
77 st->current_prot = new_prot;
78 st->level = level;
79 st->marker = address_markers;
80 seq_printf(m, "---[ %s ]---\n", st->marker->name);
81 } else if (prot != cur || level != st->level ||
82 st->current_address >= st->marker[1].start_address) {
83 /* Print the actual finished series */
84 seq_printf(m, "0x%0*lx-0x%0*lx",
85 width, st->start_address,
86 width, st->current_address);
87 delta = (st->current_address - st->start_address) >> 10;
88 while (!(delta & 0x3ff) && unit[1]) {
89 delta >>= 10;
90 unit++;
91 }
92 seq_printf(m, "%9lu%c ", delta, *unit);
93 print_prot(m, st->current_prot, st->level);
94 if (st->current_address >= st->marker[1].start_address) {
95 st->marker++;
96 seq_printf(m, "---[ %s ]---\n", st->marker->name);
97 }
98 st->start_address = st->current_address;
99 st->current_prot = new_prot;
100 st->level = level;
101 }
102}
103
104/*
105 * The actual page table walker functions. In order to keep the implementation
106 * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO
107 * flags to note_page() if a region, segment or page table entry is invalid or
108 * read-only.
109 * After all it's just a hint that the current level being walked contains an
110 * invalid or read-only entry.
111 */
112static void walk_pte_level(struct seq_file *m, struct pg_state *st,
113 pmd_t *pmd, unsigned long addr)
114{
115 unsigned int prot;
116 pte_t *pte;
117 int i;
118
119 for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
120 st->current_address = addr;
121 pte = pte_offset_kernel(pmd, addr);
122 prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID);
123 note_page(m, st, prot, 4);
124 addr += PAGE_SIZE;
125 }
126}
127
128static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
129 pud_t *pud, unsigned long addr)
130{
131 unsigned int prot;
132 pmd_t *pmd;
133 int i;
134
135 for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) {
136 st->current_address = addr;
137 pmd = pmd_offset(pud, addr);
138 if (!pmd_none(*pmd)) {
139 if (pmd_large(*pmd)) {
140 prot = pmd_val(*pmd) & _SEGMENT_ENTRY_RO;
141 note_page(m, st, prot, 3);
142 } else
143 walk_pte_level(m, st, pmd, addr);
144 } else
145 note_page(m, st, _PAGE_INVALID, 3);
146 addr += PMD_SIZE;
147 }
148}
149
150static void walk_pud_level(struct seq_file *m, struct pg_state *st,
151 pgd_t *pgd, unsigned long addr)
152{
153 unsigned int prot;
154 pud_t *pud;
155 int i;
156
157 for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
158 st->current_address = addr;
159 pud = pud_offset(pgd, addr);
160 if (!pud_none(*pud))
161 if (pud_large(*pud)) {
162 prot = pud_val(*pud) & _PAGE_RO;
163 note_page(m, st, prot, 2);
164 } else
165 walk_pmd_level(m, st, pud, addr);
166 else
167 note_page(m, st, _PAGE_INVALID, 2);
168 addr += PUD_SIZE;
169 }
170}
171
172static void walk_pgd_level(struct seq_file *m)
173{
174 unsigned long addr = 0;
175 struct pg_state st;
176 pgd_t *pgd;
177 int i;
178
179 memset(&st, 0, sizeof(st));
180 for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) {
181 st.current_address = addr;
182 pgd = pgd_offset_k(addr);
183 if (!pgd_none(*pgd))
184 walk_pud_level(m, &st, pgd, addr);
185 else
186 note_page(m, &st, _PAGE_INVALID, 1);
187 addr += PGDIR_SIZE;
188 }
189 /* Flush out the last page */
190 st.current_address = max_addr;
191 note_page(m, &st, 0, 0);
192}
193
194static int ptdump_show(struct seq_file *m, void *v)
195{
196 walk_pgd_level(m);
197 return 0;
198}
199
200static int ptdump_open(struct inode *inode, struct file *filp)
201{
202 return single_open(filp, ptdump_show, NULL);
203}
204
205static const struct file_operations ptdump_fops = {
206 .open = ptdump_open,
207 .read = seq_read,
208 .llseek = seq_lseek,
209 .release = single_release,
210};
211
212static int pt_dump_init(void)
213{
214 /*
215 * Figure out the maximum virtual address being accessible with the
216 * kernel ASCE. We need this to keep the page table walker functions
217 * from accessing non-existent entries.
218 */
219#ifdef CONFIG_32BIT
220 max_addr = 1UL << 31;
221#else
222 max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
223 max_addr = 1UL << (max_addr * 11 + 31);
224 address_markers[MODULES_NR].start_address = MODULES_VADDR;
225#endif
226 address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
227 address_markers[VMALLOC_NR].start_address = VMALLOC_START;
228 debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
229 return 0;
230}
231device_initcall(pt_dump_init);
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
deleted file mode 100644
index 4d1ee88864e..00000000000
--- a/arch/s390/mm/extable.c
+++ /dev/null
@@ -1,81 +0,0 @@
1#include <linux/module.h>
2#include <linux/sort.h>
3#include <asm/uaccess.h>
4
5/*
6 * Search one exception table for an entry corresponding to the
7 * given instruction address, and return the address of the entry,
8 * or NULL if none is found.
9 * We use a binary search, and thus we assume that the table is
10 * already sorted.
11 */
12const struct exception_table_entry *
13search_extable(const struct exception_table_entry *first,
14 const struct exception_table_entry *last,
15 unsigned long value)
16{
17 const struct exception_table_entry *mid;
18 unsigned long addr;
19
20 while (first <= last) {
21 mid = ((last - first) >> 1) + first;
22 addr = extable_insn(mid);
23 if (addr < value)
24 first = mid + 1;
25 else if (addr > value)
26 last = mid - 1;
27 else
28 return mid;
29 }
30 return NULL;
31}
32
33/*
34 * The exception table needs to be sorted so that the binary
35 * search that we use to find entries in it works properly.
36 * This is used both for the kernel exception table and for
37 * the exception tables of modules that get loaded.
38 *
39 */
40static int cmp_ex(const void *a, const void *b)
41{
42 const struct exception_table_entry *x = a, *y = b;
43
44 /* This compare is only valid after normalization. */
45 return x->insn - y->insn;
46}
47
48void sort_extable(struct exception_table_entry *start,
49 struct exception_table_entry *finish)
50{
51 struct exception_table_entry *p;
52 int i;
53
54 /* Normalize entries to being relative to the start of the section */
55 for (p = start, i = 0; p < finish; p++, i += 8)
56 p->insn += i;
57 sort(start, finish - start, sizeof(*start), cmp_ex, NULL);
58 /* Denormalize all entries */
59 for (p = start, i = 0; p < finish; p++, i += 8)
60 p->insn -= i;
61}
62
63#ifdef CONFIG_MODULES
64/*
65 * If the exception table is sorted, any referring to the module init
66 * will be at the beginning or the end.
67 */
68void trim_init_extable(struct module *m)
69{
70 /* Trim the beginning */
71 while (m->num_exentries &&
72 within_module_init(extable_insn(&m->extable[0]), m)) {
73 m->extable++;
74 m->num_exentries--;
75 }
76 /* Trim the end */
77 while (m->num_exentries &&
78 within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m))
79 m->num_exentries--;
80}
81#endif /* CONFIG_MODULES */
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 519bba716cc..075ddada491 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -1,9 +1,10 @@
1/* 1/*
2 * File...........: arch/s390/mm/extmem.c
2 * Author(s)......: Carsten Otte <cotte@de.ibm.com> 3 * Author(s)......: Carsten Otte <cotte@de.ibm.com>
3 * Rob M van der Heij <rvdheij@nl.ibm.com> 4 * Rob M van der Heij <rvdheij@nl.ibm.com>
4 * Steven Shultz <shultzss@us.ibm.com> 5 * Steven Shultz <shultzss@us.ibm.com>
5 * Bugreports.to..: <Linux390@de.ibm.com> 6 * Bugreports.to..: <Linux390@de.ibm.com>
6 * Copyright IBM Corp. 2002, 2004 7 * (C) IBM Corporation 2002-2004
7 */ 8 */
8 9
9#define KMSG_COMPONENT "extmem" 10#define KMSG_COMPONENT "extmem"
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2fb9e63b8fc..9564fc779b2 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -1,6 +1,8 @@
1/* 1/*
2 * arch/s390/mm/fault.c
3 *
2 * S390 version 4 * S390 version
3 * Copyright IBM Corp. 1999 5 * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
4 * Author(s): Hartmut Penner (hp@de.ibm.com) 6 * Author(s): Hartmut Penner (hp@de.ibm.com)
5 * Ulrich Weigand (uweigand@de.ibm.com) 7 * Ulrich Weigand (uweigand@de.ibm.com)
6 * 8 *
@@ -30,10 +32,11 @@
30#include <linux/uaccess.h> 32#include <linux/uaccess.h>
31#include <linux/hugetlb.h> 33#include <linux/hugetlb.h>
32#include <asm/asm-offsets.h> 34#include <asm/asm-offsets.h>
35#include <asm/system.h>
33#include <asm/pgtable.h> 36#include <asm/pgtable.h>
34#include <asm/irq.h> 37#include <asm/irq.h>
35#include <asm/mmu_context.h> 38#include <asm/mmu_context.h>
36#include <asm/facility.h> 39#include <asm/compat.h>
37#include "../kernel/entry.h" 40#include "../kernel/entry.h"
38 41
39#ifndef CONFIG_64BIT 42#ifndef CONFIG_64BIT
@@ -49,19 +52,14 @@
49#define VM_FAULT_BADCONTEXT 0x010000 52#define VM_FAULT_BADCONTEXT 0x010000
50#define VM_FAULT_BADMAP 0x020000 53#define VM_FAULT_BADMAP 0x020000
51#define VM_FAULT_BADACCESS 0x040000 54#define VM_FAULT_BADACCESS 0x040000
52#define VM_FAULT_SIGNAL 0x080000
53 55
54static unsigned long store_indication __read_mostly; 56static unsigned long store_indication;
55 57
56#ifdef CONFIG_64BIT 58void fault_init(void)
57static int __init fault_init(void)
58{ 59{
59 if (test_facility(75)) 60 if (test_facility(2) && test_facility(75))
60 store_indication = 0xc00; 61 store_indication = 0xc00;
61 return 0;
62} 62}
63early_initcall(fault_init);
64#endif
65 63
66static inline int notify_page_fault(struct pt_regs *regs) 64static inline int notify_page_fault(struct pt_regs *regs)
67{ 65{
@@ -115,7 +113,7 @@ static inline int user_space_fault(unsigned long trans_exc_code)
115 if (trans_exc_code == 2) 113 if (trans_exc_code == 2)
116 /* Access via secondary space, set_fs setting decides */ 114 /* Access via secondary space, set_fs setting decides */
117 return current->thread.mm_segment.ar4; 115 return current->thread.mm_segment.ar4;
118 if (s390_user_mode == HOME_SPACE_MODE) 116 if (user_mode == HOME_SPACE_MODE)
119 /* User space if the access has been done via home space. */ 117 /* User space if the access has been done via home space. */
120 return trans_exc_code == 3; 118 return trans_exc_code == 3;
121 /* 119 /*
@@ -127,7 +125,8 @@ static inline int user_space_fault(unsigned long trans_exc_code)
127 return trans_exc_code != 3; 125 return trans_exc_code != 3;
128} 126}
129 127
130static inline void report_user_fault(struct pt_regs *regs, long signr) 128static inline void report_user_fault(struct pt_regs *regs, long int_code,
129 int signr, unsigned long address)
131{ 130{
132 if ((task_pid_nr(current) > 1) && !show_unhandled_signals) 131 if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
133 return; 132 return;
@@ -135,12 +134,10 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
135 return; 134 return;
136 if (!printk_ratelimit()) 135 if (!printk_ratelimit())
137 return; 136 return;
138 printk(KERN_ALERT "User process fault: interruption code 0x%X ", 137 printk("User process fault: interruption code 0x%lX ", int_code);
139 regs->int_code);
140 print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); 138 print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
141 printk(KERN_CONT "\n"); 139 printk("\n");
142 printk(KERN_ALERT "failing address: %lX\n", 140 printk("failing address: %lX\n", address);
143 regs->int_parm_long & __FAIL_ADDR_MASK);
144 show_regs(regs); 141 show_regs(regs);
145} 142}
146 143
@@ -148,18 +145,24 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
148 * Send SIGSEGV to task. This is an external routine 145 * Send SIGSEGV to task. This is an external routine
149 * to keep the stack usage of do_page_fault small. 146 * to keep the stack usage of do_page_fault small.
150 */ 147 */
151static noinline void do_sigsegv(struct pt_regs *regs, int si_code) 148static noinline void do_sigsegv(struct pt_regs *regs, long int_code,
149 int si_code, unsigned long trans_exc_code)
152{ 150{
153 struct siginfo si; 151 struct siginfo si;
152 unsigned long address;
154 153
155 report_user_fault(regs, SIGSEGV); 154 address = trans_exc_code & __FAIL_ADDR_MASK;
155 current->thread.prot_addr = address;
156 current->thread.trap_no = int_code;
157 report_user_fault(regs, int_code, SIGSEGV, address);
156 si.si_signo = SIGSEGV; 158 si.si_signo = SIGSEGV;
157 si.si_code = si_code; 159 si.si_code = si_code;
158 si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); 160 si.si_addr = (void __user *) address;
159 force_sig_info(SIGSEGV, &si, current); 161 force_sig_info(SIGSEGV, &si, current);
160} 162}
161 163
162static noinline void do_no_context(struct pt_regs *regs) 164static noinline void do_no_context(struct pt_regs *regs, long int_code,
165 unsigned long trans_exc_code)
163{ 166{
164 const struct exception_table_entry *fixup; 167 const struct exception_table_entry *fixup;
165 unsigned long address; 168 unsigned long address;
@@ -167,7 +170,7 @@ static noinline void do_no_context(struct pt_regs *regs)
167 /* Are we prepared to handle this kernel fault? */ 170 /* Are we prepared to handle this kernel fault? */
168 fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); 171 fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
169 if (fixup) { 172 if (fixup) {
170 regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; 173 regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE;
171 return; 174 return;
172 } 175 }
173 176
@@ -175,48 +178,55 @@ static noinline void do_no_context(struct pt_regs *regs)
175 * Oops. The kernel tried to access some bad page. We'll have to 178 * Oops. The kernel tried to access some bad page. We'll have to
176 * terminate things with extreme prejudice. 179 * terminate things with extreme prejudice.
177 */ 180 */
178 address = regs->int_parm_long & __FAIL_ADDR_MASK; 181 address = trans_exc_code & __FAIL_ADDR_MASK;
179 if (!user_space_fault(regs->int_parm_long)) 182 if (!user_space_fault(trans_exc_code))
180 printk(KERN_ALERT "Unable to handle kernel pointer dereference" 183 printk(KERN_ALERT "Unable to handle kernel pointer dereference"
181 " at virtual kernel address %p\n", (void *)address); 184 " at virtual kernel address %p\n", (void *)address);
182 else 185 else
183 printk(KERN_ALERT "Unable to handle kernel paging request" 186 printk(KERN_ALERT "Unable to handle kernel paging request"
184 " at virtual user address %p\n", (void *)address); 187 " at virtual user address %p\n", (void *)address);
185 188
186 die(regs, "Oops"); 189 die("Oops", regs, int_code);
187 do_exit(SIGKILL); 190 do_exit(SIGKILL);
188} 191}
189 192
190static noinline void do_low_address(struct pt_regs *regs) 193static noinline void do_low_address(struct pt_regs *regs, long int_code,
194 unsigned long trans_exc_code)
191{ 195{
192 /* Low-address protection hit in kernel mode means 196 /* Low-address protection hit in kernel mode means
193 NULL pointer write access in kernel mode. */ 197 NULL pointer write access in kernel mode. */
194 if (regs->psw.mask & PSW_MASK_PSTATE) { 198 if (regs->psw.mask & PSW_MASK_PSTATE) {
195 /* Low-address protection hit in user mode 'cannot happen'. */ 199 /* Low-address protection hit in user mode 'cannot happen'. */
196 die (regs, "Low-address protection"); 200 die ("Low-address protection", regs, int_code);
197 do_exit(SIGKILL); 201 do_exit(SIGKILL);
198 } 202 }
199 203
200 do_no_context(regs); 204 do_no_context(regs, int_code, trans_exc_code);
201} 205}
202 206
203static noinline void do_sigbus(struct pt_regs *regs) 207static noinline void do_sigbus(struct pt_regs *regs, long int_code,
208 unsigned long trans_exc_code)
204{ 209{
205 struct task_struct *tsk = current; 210 struct task_struct *tsk = current;
211 unsigned long address;
206 struct siginfo si; 212 struct siginfo si;
207 213
208 /* 214 /*
209 * Send a sigbus, regardless of whether we were in kernel 215 * Send a sigbus, regardless of whether we were in kernel
210 * or user mode. 216 * or user mode.
211 */ 217 */
218 address = trans_exc_code & __FAIL_ADDR_MASK;
219 tsk->thread.prot_addr = address;
220 tsk->thread.trap_no = int_code;
212 si.si_signo = SIGBUS; 221 si.si_signo = SIGBUS;
213 si.si_errno = 0; 222 si.si_errno = 0;
214 si.si_code = BUS_ADRERR; 223 si.si_code = BUS_ADRERR;
215 si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); 224 si.si_addr = (void __user *) address;
216 force_sig_info(SIGBUS, &si, tsk); 225 force_sig_info(SIGBUS, &si, tsk);
217} 226}
218 227
219static noinline void do_fault_error(struct pt_regs *regs, int fault) 228static noinline void do_fault_error(struct pt_regs *regs, long int_code,
229 unsigned long trans_exc_code, int fault)
220{ 230{
221 int si_code; 231 int si_code;
222 232
@@ -224,32 +234,28 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
224 case VM_FAULT_BADACCESS: 234 case VM_FAULT_BADACCESS:
225 case VM_FAULT_BADMAP: 235 case VM_FAULT_BADMAP:
226 /* Bad memory access. Check if it is kernel or user space. */ 236 /* Bad memory access. Check if it is kernel or user space. */
227 if (user_mode(regs)) { 237 if (regs->psw.mask & PSW_MASK_PSTATE) {
228 /* User mode accesses just cause a SIGSEGV */ 238 /* User mode accesses just cause a SIGSEGV */
229 si_code = (fault == VM_FAULT_BADMAP) ? 239 si_code = (fault == VM_FAULT_BADMAP) ?
230 SEGV_MAPERR : SEGV_ACCERR; 240 SEGV_MAPERR : SEGV_ACCERR;
231 do_sigsegv(regs, si_code); 241 do_sigsegv(regs, int_code, si_code, trans_exc_code);
232 return; 242 return;
233 } 243 }
234 case VM_FAULT_BADCONTEXT: 244 case VM_FAULT_BADCONTEXT:
235 do_no_context(regs); 245 do_no_context(regs, int_code, trans_exc_code);
236 break;
237 case VM_FAULT_SIGNAL:
238 if (!user_mode(regs))
239 do_no_context(regs);
240 break; 246 break;
241 default: /* fault & VM_FAULT_ERROR */ 247 default: /* fault & VM_FAULT_ERROR */
242 if (fault & VM_FAULT_OOM) { 248 if (fault & VM_FAULT_OOM) {
243 if (!user_mode(regs)) 249 if (!(regs->psw.mask & PSW_MASK_PSTATE))
244 do_no_context(regs); 250 do_no_context(regs, int_code, trans_exc_code);
245 else 251 else
246 pagefault_out_of_memory(); 252 pagefault_out_of_memory();
247 } else if (fault & VM_FAULT_SIGBUS) { 253 } else if (fault & VM_FAULT_SIGBUS) {
248 /* Kernel mode? Handle exceptions or die */ 254 /* Kernel mode? Handle exceptions or die */
249 if (!user_mode(regs)) 255 if (!(regs->psw.mask & PSW_MASK_PSTATE))
250 do_no_context(regs); 256 do_no_context(regs, int_code, trans_exc_code);
251 else 257 else
252 do_sigbus(regs); 258 do_sigbus(regs, int_code, trans_exc_code);
253 } else 259 } else
254 BUG(); 260 BUG();
255 break; 261 break;
@@ -267,28 +273,21 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
267 * 11 Page translation -> Not present (nullification) 273 * 11 Page translation -> Not present (nullification)
268 * 3b Region third trans. -> Not present (nullification) 274 * 3b Region third trans. -> Not present (nullification)
269 */ 275 */
270static inline int do_exception(struct pt_regs *regs, int access) 276static inline int do_exception(struct pt_regs *regs, int access,
277 unsigned long trans_exc_code)
271{ 278{
272 struct task_struct *tsk; 279 struct task_struct *tsk;
273 struct mm_struct *mm; 280 struct mm_struct *mm;
274 struct vm_area_struct *vma; 281 struct vm_area_struct *vma;
275 unsigned long trans_exc_code;
276 unsigned long address; 282 unsigned long address;
277 unsigned int flags; 283 unsigned int flags;
278 int fault; 284 int fault;
279 285
280 tsk = current;
281 /*
282 * The instruction that caused the program check has
283 * been nullified. Don't signal single step via SIGTRAP.
284 */
285 clear_tsk_thread_flag(tsk, TIF_PER_TRAP);
286
287 if (notify_page_fault(regs)) 286 if (notify_page_fault(regs))
288 return 0; 287 return 0;
289 288
289 tsk = current;
290 mm = tsk->mm; 290 mm = tsk->mm;
291 trans_exc_code = regs->int_parm_long;
292 291
293 /* 292 /*
294 * Verify that the fault happened in user space, that 293 * Verify that the fault happened in user space, that
@@ -301,14 +300,14 @@ static inline int do_exception(struct pt_regs *regs, int access)
301 300
302 address = trans_exc_code & __FAIL_ADDR_MASK; 301 address = trans_exc_code & __FAIL_ADDR_MASK;
303 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 302 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
304 flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 303 flags = FAULT_FLAG_ALLOW_RETRY;
305 if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) 304 if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
306 flags |= FAULT_FLAG_WRITE; 305 flags |= FAULT_FLAG_WRITE;
307 down_read(&mm->mmap_sem); 306 down_read(&mm->mmap_sem);
308 307
309#ifdef CONFIG_PGSTE 308#ifdef CONFIG_PGSTE
310 if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { 309 if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) {
311 address = __gmap_fault(address, 310 address = gmap_fault(address,
312 (struct gmap *) S390_lowcore.gmap); 311 (struct gmap *) S390_lowcore.gmap);
313 if (address == -EFAULT) { 312 if (address == -EFAULT) {
314 fault = VM_FAULT_BADMAP; 313 fault = VM_FAULT_BADMAP;
@@ -350,11 +349,6 @@ retry:
350 * the fault. 349 * the fault.
351 */ 350 */
352 fault = handle_mm_fault(mm, vma, address, flags); 351 fault = handle_mm_fault(mm, vma, address, flags);
353 /* No reason to continue if interrupted by SIGKILL. */
354 if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
355 fault = VM_FAULT_SIGNAL;
356 goto out;
357 }
358 if (unlikely(fault & VM_FAULT_ERROR)) 352 if (unlikely(fault & VM_FAULT_ERROR))
359 goto out_up; 353 goto out_up;
360 354
@@ -377,11 +371,15 @@ retry:
377 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 371 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
378 * of starvation. */ 372 * of starvation. */
379 flags &= ~FAULT_FLAG_ALLOW_RETRY; 373 flags &= ~FAULT_FLAG_ALLOW_RETRY;
380 flags |= FAULT_FLAG_TRIED;
381 down_read(&mm->mmap_sem); 374 down_read(&mm->mmap_sem);
382 goto retry; 375 goto retry;
383 } 376 }
384 } 377 }
378 /*
379 * The instruction that caused the program check will
380 * be repeated. Don't signal single step via SIGTRAP.
381 */
382 clear_tsk_thread_flag(tsk, TIF_PER_TRAP);
385 fault = 0; 383 fault = 0;
386out_up: 384out_up:
387 up_read(&mm->mmap_sem); 385 up_read(&mm->mmap_sem);
@@ -389,52 +387,45 @@ out:
389 return fault; 387 return fault;
390} 388}
391 389
392void __kprobes do_protection_exception(struct pt_regs *regs) 390void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code,
391 unsigned long trans_exc_code)
393{ 392{
394 unsigned long trans_exc_code;
395 int fault; 393 int fault;
396 394
397 trans_exc_code = regs->int_parm_long;
398 /* Protection exception is suppressing, decrement psw address. */ 395 /* Protection exception is suppressing, decrement psw address. */
399 regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); 396 regs->psw.addr -= (pgm_int_code >> 16);
400 /* 397 /*
401 * Check for low-address protection. This needs to be treated 398 * Check for low-address protection. This needs to be treated
402 * as a special case because the translation exception code 399 * as a special case because the translation exception code
403 * field is not guaranteed to contain valid data in this case. 400 * field is not guaranteed to contain valid data in this case.
404 */ 401 */
405 if (unlikely(!(trans_exc_code & 4))) { 402 if (unlikely(!(trans_exc_code & 4))) {
406 do_low_address(regs); 403 do_low_address(regs, pgm_int_code, trans_exc_code);
407 return; 404 return;
408 } 405 }
409 fault = do_exception(regs, VM_WRITE); 406 fault = do_exception(regs, VM_WRITE, trans_exc_code);
410 if (unlikely(fault)) 407 if (unlikely(fault))
411 do_fault_error(regs, fault); 408 do_fault_error(regs, 4, trans_exc_code, fault);
412} 409}
413 410
414void __kprobes do_dat_exception(struct pt_regs *regs) 411void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code,
412 unsigned long trans_exc_code)
415{ 413{
416 int access, fault; 414 int access, fault;
417 415
418 access = VM_READ | VM_EXEC | VM_WRITE; 416 access = VM_READ | VM_EXEC | VM_WRITE;
419 fault = do_exception(regs, access); 417 fault = do_exception(regs, access, trans_exc_code);
420 if (unlikely(fault)) 418 if (unlikely(fault))
421 do_fault_error(regs, fault); 419 do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault);
422} 420}
423 421
424#ifdef CONFIG_64BIT 422#ifdef CONFIG_64BIT
425void __kprobes do_asce_exception(struct pt_regs *regs) 423void __kprobes do_asce_exception(struct pt_regs *regs, long pgm_int_code,
424 unsigned long trans_exc_code)
426{ 425{
427 struct mm_struct *mm = current->mm; 426 struct mm_struct *mm = current->mm;
428 struct vm_area_struct *vma; 427 struct vm_area_struct *vma;
429 unsigned long trans_exc_code;
430 428
431 /*
432 * The instruction that caused the program check has
433 * been nullified. Don't signal single step via SIGTRAP.
434 */
435 clear_tsk_thread_flag(current, TIF_PER_TRAP);
436
437 trans_exc_code = regs->int_parm_long;
438 if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) 429 if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
439 goto no_context; 430 goto no_context;
440 431
@@ -448,13 +439,13 @@ void __kprobes do_asce_exception(struct pt_regs *regs)
448 } 439 }
449 440
450 /* User mode accesses just cause a SIGSEGV */ 441 /* User mode accesses just cause a SIGSEGV */
451 if (user_mode(regs)) { 442 if (regs->psw.mask & PSW_MASK_PSTATE) {
452 do_sigsegv(regs, SEGV_MAPERR); 443 do_sigsegv(regs, pgm_int_code, SEGV_MAPERR, trans_exc_code);
453 return; 444 return;
454 } 445 }
455 446
456no_context: 447no_context:
457 do_no_context(regs); 448 do_no_context(regs, pgm_int_code, trans_exc_code);
458} 449}
459#endif 450#endif
460 451
@@ -463,22 +454,20 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write)
463 struct pt_regs regs; 454 struct pt_regs regs;
464 int access, fault; 455 int access, fault;
465 456
466 /* Emulate a uaccess fault from kernel mode. */ 457 regs.psw.mask = psw_kernel_bits;
467 regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK;
468 if (!irqs_disabled()) 458 if (!irqs_disabled())
469 regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; 459 regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT;
470 regs.psw.addr = (unsigned long) __builtin_return_address(0); 460 regs.psw.addr = (unsigned long) __builtin_return_address(0);
471 regs.psw.addr |= PSW_ADDR_AMODE; 461 regs.psw.addr |= PSW_ADDR_AMODE;
472 regs.int_code = pgm_int_code; 462 uaddr &= PAGE_MASK;
473 regs.int_parm_long = (uaddr & PAGE_MASK) | 2;
474 access = write ? VM_WRITE : VM_READ; 463 access = write ? VM_WRITE : VM_READ;
475 fault = do_exception(&regs, access); 464 fault = do_exception(&regs, access, uaddr | 2);
476 /* 465 if (unlikely(fault)) {
477 * Since the fault happened in kernel mode while performing a uaccess 466 if (fault & VM_FAULT_OOM)
478 * all we need to do now is emulating a fixup in case "fault" is not 467 return -EFAULT;
479 * zero. 468 else if (fault & VM_FAULT_SIGBUS)
480 * For the calling uaccess functions this results always in -EFAULT. 469 do_sigbus(&regs, pgm_int_code, uaddr);
481 */ 470 }
482 return fault ? -EFAULT : 0; 471 return fault ? -EFAULT : 0;
483} 472}
484 473
@@ -520,7 +509,7 @@ int pfault_init(void)
520 .reserved = __PF_RES_FIELD }; 509 .reserved = __PF_RES_FIELD };
521 int rc; 510 int rc;
522 511
523 if (pfault_disable) 512 if (!MACHINE_IS_VM || pfault_disable)
524 return -1; 513 return -1;
525 asm volatile( 514 asm volatile(
526 " diag %1,%0,0x258\n" 515 " diag %1,%0,0x258\n"
@@ -541,7 +530,7 @@ void pfault_fini(void)
541 .refversn = 2, 530 .refversn = 2,
542 }; 531 };
543 532
544 if (pfault_disable) 533 if (!MACHINE_IS_VM || pfault_disable)
545 return; 534 return;
546 asm volatile( 535 asm volatile(
547 " diag %0,0,0x258\n" 536 " diag %0,0,0x258\n"
@@ -553,7 +542,7 @@ void pfault_fini(void)
553static DEFINE_SPINLOCK(pfault_lock); 542static DEFINE_SPINLOCK(pfault_lock);
554static LIST_HEAD(pfault_list); 543static LIST_HEAD(pfault_list);
555 544
556static void pfault_interrupt(struct ext_code ext_code, 545static void pfault_interrupt(unsigned int ext_int_code,
557 unsigned int param32, unsigned long param64) 546 unsigned int param32, unsigned long param64)
558{ 547{
559 struct task_struct *tsk; 548 struct task_struct *tsk;
@@ -566,19 +555,23 @@ static void pfault_interrupt(struct ext_code ext_code,
566 * in the 'cpu address' field associated with the 555 * in the 'cpu address' field associated with the
567 * external interrupt. 556 * external interrupt.
568 */ 557 */
569 subcode = ext_code.subcode; 558 subcode = ext_int_code >> 16;
570 if ((subcode & 0xff00) != __SUBCODE_MASK) 559 if ((subcode & 0xff00) != __SUBCODE_MASK)
571 return; 560 return;
572 inc_irq_stat(IRQEXT_PFL); 561 kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
573 /* Get the token (= pid of the affected task). */ 562 if (subcode & 0x0080) {
574 pid = sizeof(void *) == 4 ? param32 : param64; 563 /* Get the token (= pid of the affected task). */
575 rcu_read_lock(); 564 pid = sizeof(void *) == 4 ? param32 : param64;
576 tsk = find_task_by_pid_ns(pid, &init_pid_ns); 565 rcu_read_lock();
577 if (tsk) 566 tsk = find_task_by_pid_ns(pid, &init_pid_ns);
578 get_task_struct(tsk); 567 if (tsk)
579 rcu_read_unlock(); 568 get_task_struct(tsk);
580 if (!tsk) 569 rcu_read_unlock();
581 return; 570 if (!tsk)
571 return;
572 } else {
573 tsk = current;
574 }
582 spin_lock(&pfault_lock); 575 spin_lock(&pfault_lock);
583 if (subcode & 0x0080) { 576 if (subcode & 0x0080) {
584 /* signal bit is set -> a page has been swapped in by VM */ 577 /* signal bit is set -> a page has been swapped in by VM */
@@ -591,47 +584,30 @@ static void pfault_interrupt(struct ext_code ext_code,
591 tsk->thread.pfault_wait = 0; 584 tsk->thread.pfault_wait = 0;
592 list_del(&tsk->thread.list); 585 list_del(&tsk->thread.list);
593 wake_up_process(tsk); 586 wake_up_process(tsk);
594 put_task_struct(tsk);
595 } else { 587 } else {
596 /* Completion interrupt was faster than initial 588 /* Completion interrupt was faster than initial
597 * interrupt. Set pfault_wait to -1 so the initial 589 * interrupt. Set pfault_wait to -1 so the initial
598 * interrupt doesn't put the task to sleep. 590 * interrupt doesn't put the task to sleep. */
599 * If the task is not running, ignore the completion 591 tsk->thread.pfault_wait = -1;
600 * interrupt since it must be a leftover of a PFAULT
601 * CANCEL operation which didn't remove all pending
602 * completion interrupts. */
603 if (tsk->state == TASK_RUNNING)
604 tsk->thread.pfault_wait = -1;
605 } 592 }
593 put_task_struct(tsk);
606 } else { 594 } else {
607 /* signal bit not set -> a real page is missing. */ 595 /* signal bit not set -> a real page is missing. */
608 if (WARN_ON_ONCE(tsk != current)) 596 if (tsk->thread.pfault_wait == -1) {
609 goto out;
610 if (tsk->thread.pfault_wait == 1) {
611 /* Already on the list with a reference: put to sleep */
612 __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
613 set_tsk_need_resched(tsk);
614 } else if (tsk->thread.pfault_wait == -1) {
615 /* Completion interrupt was faster than the initial 597 /* Completion interrupt was faster than the initial
616 * interrupt (pfault_wait == -1). Set pfault_wait 598 * interrupt (pfault_wait == -1). Set pfault_wait
617 * back to zero and exit. */ 599 * back to zero and exit. */
618 tsk->thread.pfault_wait = 0; 600 tsk->thread.pfault_wait = 0;
619 } else { 601 } else {
620 /* Initial interrupt arrived before completion 602 /* Initial interrupt arrived before completion
621 * interrupt. Let the task sleep. 603 * interrupt. Let the task sleep. */
622 * An extra task reference is needed since a different
623 * cpu may set the task state to TASK_RUNNING again
624 * before the scheduler is reached. */
625 get_task_struct(tsk);
626 tsk->thread.pfault_wait = 1; 604 tsk->thread.pfault_wait = 1;
627 list_add(&tsk->thread.list, &pfault_list); 605 list_add(&tsk->thread.list, &pfault_list);
628 __set_task_state(tsk, TASK_UNINTERRUPTIBLE); 606 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
629 set_tsk_need_resched(tsk); 607 set_tsk_need_resched(tsk);
630 } 608 }
631 } 609 }
632out:
633 spin_unlock(&pfault_lock); 610 spin_unlock(&pfault_lock);
634 put_task_struct(tsk);
635} 611}
636 612
637static int __cpuinit pfault_cpu_notify(struct notifier_block *self, 613static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
@@ -640,15 +616,15 @@ static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
640 struct thread_struct *thread, *next; 616 struct thread_struct *thread, *next;
641 struct task_struct *tsk; 617 struct task_struct *tsk;
642 618
643 switch (action & ~CPU_TASKS_FROZEN) { 619 switch (action) {
644 case CPU_DEAD: 620 case CPU_DEAD:
621 case CPU_DEAD_FROZEN:
645 spin_lock_irq(&pfault_lock); 622 spin_lock_irq(&pfault_lock);
646 list_for_each_entry_safe(thread, next, &pfault_list, list) { 623 list_for_each_entry_safe(thread, next, &pfault_list, list) {
647 thread->pfault_wait = 0; 624 thread->pfault_wait = 0;
648 list_del(&thread->list); 625 list_del(&thread->list);
649 tsk = container_of(thread, struct task_struct, thread); 626 tsk = container_of(thread, struct task_struct, thread);
650 wake_up_process(tsk); 627 wake_up_process(tsk);
651 put_task_struct(tsk);
652 } 628 }
653 spin_unlock_irq(&pfault_lock); 629 spin_unlock_irq(&pfault_lock);
654 break; 630 break;
@@ -662,6 +638,8 @@ static int __init pfault_irq_init(void)
662{ 638{
663 int rc; 639 int rc;
664 640
641 if (!MACHINE_IS_VM)
642 return 0;
665 rc = register_external_interrupt(0x2603, pfault_interrupt); 643 rc = register_external_interrupt(0x2603, pfault_interrupt);
666 if (rc) 644 if (rc)
667 goto out_extint; 645 goto out_extint;
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 1f5315d1215..65cb06e2af4 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -115,18 +115,9 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
115 pmd = *pmdp; 115 pmd = *pmdp;
116 barrier(); 116 barrier();
117 next = pmd_addr_end(addr, end); 117 next = pmd_addr_end(addr, end);
118 /* 118 if (pmd_none(pmd))
119 * The pmd_trans_splitting() check below explains why
120 * pmdp_splitting_flush() has to serialize with
121 * smp_call_function() against our disabled IRQs, to stop
122 * this gup-fast code from running while we set the
123 * splitting bit in the pmd. Returning zero will take
124 * the slow path that will call wait_split_huge_page()
125 * if the pmd is still in splitting state.
126 */
127 if (pmd_none(pmd) || pmd_trans_splitting(pmd))
128 return 0; 119 return 0;
129 if (unlikely(pmd_large(pmd))) { 120 if (unlikely(pmd_huge(pmd))) {
130 if (!gup_huge_pmd(pmdp, pmd, addr, next, 121 if (!gup_huge_pmd(pmdp, pmd, addr, next,
131 write, pages, nr)) 122 write, pages, nr))
132 return 0; 123 return 0;
@@ -163,42 +154,6 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
163 return 1; 154 return 1;
164} 155}
165 156
166/*
167 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
168 * back to the regular GUP.
169 */
170int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
171 struct page **pages)
172{
173 struct mm_struct *mm = current->mm;
174 unsigned long addr, len, end;
175 unsigned long next, flags;
176 pgd_t *pgdp, pgd;
177 int nr = 0;
178
179 start &= PAGE_MASK;
180 addr = start;
181 len = (unsigned long) nr_pages << PAGE_SHIFT;
182 end = start + len;
183 if ((end < start) || (end > TASK_SIZE))
184 return 0;
185
186 local_irq_save(flags);
187 pgdp = pgd_offset(mm, addr);
188 do {
189 pgd = *pgdp;
190 barrier();
191 next = pgd_addr_end(addr, end);
192 if (pgd_none(pgd))
193 break;
194 if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
195 break;
196 } while (pgdp++, addr = next, addr != end);
197 local_irq_restore(flags);
198
199 return nr;
200}
201
202/** 157/**
203 * get_user_pages_fast() - pin user pages in memory 158 * get_user_pages_fast() - pin user pages in memory
204 * @start: starting user address 159 * @start: starting user address
@@ -228,7 +183,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
228 addr = start; 183 addr = start;
229 len = (unsigned long) nr_pages << PAGE_SHIFT; 184 len = (unsigned long) nr_pages << PAGE_SHIFT;
230 end = start + len; 185 end = start + len;
231 if ((end < start) || (end > TASK_SIZE)) 186 if (end < start)
232 goto slow_irqon; 187 goto slow_irqon;
233 188
234 /* 189 /*
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 532525ec88c..597bb2d27c3 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * IBM System z Huge TLB Page Support for Kernel. 2 * IBM System z Huge TLB Page Support for Kernel.
3 * 3 *
4 * Copyright IBM Corp. 2007 4 * Copyright 2007 IBM Corp.
5 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> 5 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
6 */ 6 */
7 7
@@ -58,8 +58,6 @@ void arch_release_hugepage(struct page *page)
58 ptep = (pte_t *) page[1].index; 58 ptep = (pte_t *) page[1].index;
59 if (!ptep) 59 if (!ptep)
60 return; 60 return;
61 clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY,
62 PTRS_PER_PTE * sizeof(pte_t));
63 page_table_free(&init_mm, (unsigned long *) ptep); 61 page_table_free(&init_mm, (unsigned long *) ptep);
64 page[1].index = 0; 62 page[1].index = 0;
65} 63}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index ae672f41c46..59b663109d9 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -1,6 +1,8 @@
1/* 1/*
2 * arch/s390/mm/init.c
3 *
2 * S390 version 4 * S390 version
3 * Copyright IBM Corp. 1999 5 * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
4 * Author(s): Hartmut Penner (hp@de.ibm.com) 6 * Author(s): Hartmut Penner (hp@de.ibm.com)
5 * 7 *
6 * Derived from "arch/i386/mm/init.c" 8 * Derived from "arch/i386/mm/init.c"
@@ -24,9 +26,9 @@
24#include <linux/pfn.h> 26#include <linux/pfn.h>
25#include <linux/poison.h> 27#include <linux/poison.h>
26#include <linux/initrd.h> 28#include <linux/initrd.h>
27#include <linux/export.h>
28#include <linux/gfp.h> 29#include <linux/gfp.h>
29#include <asm/processor.h> 30#include <asm/processor.h>
31#include <asm/system.h>
30#include <asm/uaccess.h> 32#include <asm/uaccess.h>
31#include <asm/pgtable.h> 33#include <asm/pgtable.h>
32#include <asm/pgalloc.h> 34#include <asm/pgalloc.h>
@@ -35,14 +37,13 @@
35#include <asm/tlb.h> 37#include <asm/tlb.h>
36#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
37#include <asm/sections.h> 39#include <asm/sections.h>
38#include <asm/ctl_reg.h>
39 40
40pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); 41pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
41 42
42unsigned long empty_zero_page, zero_page_mask; 43unsigned long empty_zero_page, zero_page_mask;
43EXPORT_SYMBOL(empty_zero_page); 44EXPORT_SYMBOL(empty_zero_page);
44 45
45static unsigned long __init setup_zero_pages(void) 46static unsigned long setup_zero_pages(void)
46{ 47{
47 struct cpuid cpu_id; 48 struct cpuid cpu_id;
48 unsigned int order; 49 unsigned int order;
@@ -91,22 +92,18 @@ static unsigned long __init setup_zero_pages(void)
91void __init paging_init(void) 92void __init paging_init(void)
92{ 93{
93 unsigned long max_zone_pfns[MAX_NR_ZONES]; 94 unsigned long max_zone_pfns[MAX_NR_ZONES];
94 unsigned long pgd_type, asce_bits; 95 unsigned long pgd_type;
95 96
96 init_mm.pgd = swapper_pg_dir; 97 init_mm.pgd = swapper_pg_dir;
98 S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK;
97#ifdef CONFIG_64BIT 99#ifdef CONFIG_64BIT
98 if (VMALLOC_END > (1UL << 42)) { 100 /* A three level page table (4TB) is enough for the kernel space. */
99 asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 101 S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
100 pgd_type = _REGION2_ENTRY_EMPTY; 102 pgd_type = _REGION3_ENTRY_EMPTY;
101 } else {
102 asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
103 pgd_type = _REGION3_ENTRY_EMPTY;
104 }
105#else 103#else
106 asce_bits = _ASCE_TABLE_LENGTH; 104 S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH;
107 pgd_type = _SEGMENT_ENTRY_EMPTY; 105 pgd_type = _SEGMENT_ENTRY_EMPTY;
108#endif 106#endif
109 S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
110 clear_table((unsigned long *) init_mm.pgd, pgd_type, 107 clear_table((unsigned long *) init_mm.pgd, pgd_type,
111 sizeof(unsigned long)*2048); 108 sizeof(unsigned long)*2048);
112 vmem_map_init(); 109 vmem_map_init();
@@ -125,6 +122,7 @@ void __init paging_init(void)
125 max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); 122 max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
126 max_zone_pfns[ZONE_NORMAL] = max_low_pfn; 123 max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
127 free_area_init_nodes(max_zone_pfns); 124 free_area_init_nodes(max_zone_pfns);
125 fault_init();
128} 126}
129 127
130void __init mem_init(void) 128void __init mem_init(void)
@@ -158,6 +156,34 @@ void __init mem_init(void)
158 PFN_ALIGN((unsigned long)&_eshared) - 1); 156 PFN_ALIGN((unsigned long)&_eshared) - 1);
159} 157}
160 158
159#ifdef CONFIG_DEBUG_PAGEALLOC
160void kernel_map_pages(struct page *page, int numpages, int enable)
161{
162 pgd_t *pgd;
163 pud_t *pud;
164 pmd_t *pmd;
165 pte_t *pte;
166 unsigned long address;
167 int i;
168
169 for (i = 0; i < numpages; i++) {
170 address = page_to_phys(page + i);
171 pgd = pgd_offset_k(address);
172 pud = pud_offset(pgd, address);
173 pmd = pmd_offset(pud, address);
174 pte = pte_offset_kernel(pmd, address);
175 if (!enable) {
176 __ptep_ipte(address, pte);
177 pte_val(*pte) = _PAGE_TYPE_EMPTY;
178 continue;
179 }
180 *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
181 /* Flush cpu write queue. */
182 mb();
183 }
184}
185#endif
186
161void free_init_pages(char *what, unsigned long begin, unsigned long end) 187void free_init_pages(char *what, unsigned long begin, unsigned long end)
162{ 188{
163 unsigned long addr = begin; 189 unsigned long addr = begin;
@@ -183,7 +209,7 @@ void free_initmem(void)
183} 209}
184 210
185#ifdef CONFIG_BLK_DEV_INITRD 211#ifdef CONFIG_BLK_DEV_INITRD
186void __init free_initrd_mem(unsigned long start, unsigned long end) 212void free_initrd_mem(unsigned long start, unsigned long end)
187{ 213{
188 free_init_pages("initrd memory", start, end); 214 free_init_pages("initrd memory", start, end);
189} 215}
@@ -192,38 +218,16 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
192#ifdef CONFIG_MEMORY_HOTPLUG 218#ifdef CONFIG_MEMORY_HOTPLUG
193int arch_add_memory(int nid, u64 start, u64 size) 219int arch_add_memory(int nid, u64 start, u64 size)
194{ 220{
195 unsigned long zone_start_pfn, zone_end_pfn, nr_pages; 221 struct pglist_data *pgdat;
196 unsigned long start_pfn = PFN_DOWN(start);
197 unsigned long size_pages = PFN_DOWN(size);
198 struct zone *zone; 222 struct zone *zone;
199 int rc; 223 int rc;
200 224
225 pgdat = NODE_DATA(nid);
226 zone = pgdat->node_zones + ZONE_MOVABLE;
201 rc = vmem_add_mapping(start, size); 227 rc = vmem_add_mapping(start, size);
202 if (rc) 228 if (rc)
203 return rc; 229 return rc;
204 for_each_zone(zone) { 230 rc = __add_pages(nid, zone, PFN_DOWN(start), PFN_DOWN(size));
205 if (zone_idx(zone) != ZONE_MOVABLE) {
206 /* Add range within existing zone limits */
207 zone_start_pfn = zone->zone_start_pfn;
208 zone_end_pfn = zone->zone_start_pfn +
209 zone->spanned_pages;
210 } else {
211 /* Add remaining range to ZONE_MOVABLE */
212 zone_start_pfn = start_pfn;
213 zone_end_pfn = start_pfn + size_pages;
214 }
215 if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
216 continue;
217 nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
218 zone_end_pfn - start_pfn : size_pages;
219 rc = __add_pages(nid, zone, start_pfn, nr_pages);
220 if (rc)
221 break;
222 start_pfn += nr_pages;
223 size_pages -= nr_pages;
224 if (!size_pages)
225 break;
226 }
227 if (rc) 231 if (rc)
228 vmem_remove_mapping(start, size); 232 vmem_remove_mapping(start, size);
229 return rc; 233 return rc;
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 921fa541dc0..5dbbaa6e594 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -11,9 +11,7 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <linux/gfp.h> 14#include <asm/system.h>
15#include <linux/cpu.h>
16#include <asm/ctl_reg.h>
17 15
18/* 16/*
19 * This function writes to kernel memory bypassing DAT and possible 17 * This function writes to kernel memory bypassing DAT and possible
@@ -62,14 +60,18 @@ long probe_kernel_write(void *dst, const void *src, size_t size)
62 return copied < 0 ? -EFAULT : 0; 60 return copied < 0 ? -EFAULT : 0;
63} 61}
64 62
65static int __memcpy_real(void *dest, void *src, size_t count) 63int memcpy_real(void *dest, void *src, size_t count)
66{ 64{
67 register unsigned long _dest asm("2") = (unsigned long) dest; 65 register unsigned long _dest asm("2") = (unsigned long) dest;
68 register unsigned long _len1 asm("3") = (unsigned long) count; 66 register unsigned long _len1 asm("3") = (unsigned long) count;
69 register unsigned long _src asm("4") = (unsigned long) src; 67 register unsigned long _src asm("4") = (unsigned long) src;
70 register unsigned long _len2 asm("5") = (unsigned long) count; 68 register unsigned long _len2 asm("5") = (unsigned long) count;
69 unsigned long flags;
71 int rc = -EFAULT; 70 int rc = -EFAULT;
72 71
72 if (!count)
73 return 0;
74 flags = __arch_local_irq_stnsm(0xf8UL);
73 asm volatile ( 75 asm volatile (
74 "0: mvcle %1,%2,0x0\n" 76 "0: mvcle %1,%2,0x0\n"
75 "1: jo 0b\n" 77 "1: jo 0b\n"
@@ -80,150 +82,22 @@ static int __memcpy_real(void *dest, void *src, size_t count)
80 "+d" (_len2), "=m" (*((long *) dest)) 82 "+d" (_len2), "=m" (*((long *) dest))
81 : "m" (*((long *) src)) 83 : "m" (*((long *) src))
82 : "cc", "memory"); 84 : "cc", "memory");
85 arch_local_irq_restore(flags);
83 return rc; 86 return rc;
84} 87}
85 88
86/* 89/*
87 * Copy memory in real mode (kernel to kernel) 90 * Copy memory to absolute zero
88 */
89int memcpy_real(void *dest, void *src, size_t count)
90{
91 unsigned long flags;
92 int rc;
93
94 if (!count)
95 return 0;
96 local_irq_save(flags);
97 __arch_local_irq_stnsm(0xfbUL);
98 rc = __memcpy_real(dest, src, count);
99 local_irq_restore(flags);
100 return rc;
101}
102
103/*
104 * Copy memory in absolute mode (kernel to kernel)
105 */ 91 */
106void memcpy_absolute(void *dest, void *src, size_t count) 92void copy_to_absolute_zero(void *dest, void *src, size_t count)
107{ 93{
108 unsigned long cr0, flags, prefix; 94 unsigned long cr0;
109 95
110 flags = arch_local_irq_save(); 96 BUG_ON((unsigned long) dest + count >= sizeof(struct _lowcore));
97 preempt_disable();
111 __ctl_store(cr0, 0, 0); 98 __ctl_store(cr0, 0, 0);
112 __ctl_clear_bit(0, 28); /* disable lowcore protection */ 99 __ctl_clear_bit(0, 28); /* disable lowcore protection */
113 prefix = store_prefix(); 100 memcpy_real(dest + store_prefix(), src, count);
114 if (prefix) {
115 local_mcck_disable();
116 set_prefix(0);
117 memcpy(dest, src, count);
118 set_prefix(prefix);
119 local_mcck_enable();
120 } else {
121 memcpy(dest, src, count);
122 }
123 __ctl_load(cr0, 0, 0); 101 __ctl_load(cr0, 0, 0);
124 arch_local_irq_restore(flags);
125}
126
127/*
128 * Copy memory from kernel (real) to user (virtual)
129 */
130int copy_to_user_real(void __user *dest, void *src, size_t count)
131{
132 int offs = 0, size, rc;
133 char *buf;
134
135 buf = (char *) __get_free_page(GFP_KERNEL);
136 if (!buf)
137 return -ENOMEM;
138 rc = -EFAULT;
139 while (offs < count) {
140 size = min(PAGE_SIZE, count - offs);
141 if (memcpy_real(buf, src + offs, size))
142 goto out;
143 if (copy_to_user(dest + offs, buf, size))
144 goto out;
145 offs += size;
146 }
147 rc = 0;
148out:
149 free_page((unsigned long) buf);
150 return rc;
151}
152
153/*
154 * Copy memory from user (virtual) to kernel (real)
155 */
156int copy_from_user_real(void *dest, void __user *src, size_t count)
157{
158 int offs = 0, size, rc;
159 char *buf;
160
161 buf = (char *) __get_free_page(GFP_KERNEL);
162 if (!buf)
163 return -ENOMEM;
164 rc = -EFAULT;
165 while (offs < count) {
166 size = min(PAGE_SIZE, count - offs);
167 if (copy_from_user(buf, src + offs, size))
168 goto out;
169 if (memcpy_real(dest + offs, buf, size))
170 goto out;
171 offs += size;
172 }
173 rc = 0;
174out:
175 free_page((unsigned long) buf);
176 return rc;
177}
178
179/*
180 * Check if physical address is within prefix or zero page
181 */
182static int is_swapped(unsigned long addr)
183{
184 unsigned long lc;
185 int cpu;
186
187 if (addr < sizeof(struct _lowcore))
188 return 1;
189 for_each_online_cpu(cpu) {
190 lc = (unsigned long) lowcore_ptr[cpu];
191 if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc)
192 continue;
193 return 1;
194 }
195 return 0;
196}
197
198/*
199 * Convert a physical pointer for /dev/mem access
200 *
201 * For swapped prefix pages a new buffer is returned that contains a copy of
202 * the absolute memory. The buffer size is maximum one page large.
203 */
204void *xlate_dev_mem_ptr(unsigned long addr)
205{
206 void *bounce = (void *) addr;
207 unsigned long size;
208
209 get_online_cpus();
210 preempt_disable();
211 if (is_swapped(addr)) {
212 size = PAGE_SIZE - (addr & ~PAGE_MASK);
213 bounce = (void *) __get_free_page(GFP_ATOMIC);
214 if (bounce)
215 memcpy_absolute(bounce, (void *) addr, size);
216 }
217 preempt_enable(); 102 preempt_enable();
218 put_online_cpus();
219 return bounce;
220}
221
222/*
223 * Free converted buffer for /dev/mem access (if necessary)
224 */
225void unxlate_dev_mem_ptr(unsigned long addr, void *buf)
226{
227 if ((void *) addr != buf)
228 free_page((unsigned long) buf);
229} 103}
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index c59a5efa58b..c9a9f7f1818 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -1,4 +1,6 @@
1/* 1/*
2 * linux/arch/s390/mm/mmap.c
3 *
2 * flexible mmap layout support 4 * flexible mmap layout support
3 * 5 *
4 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. 6 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
@@ -24,11 +26,10 @@
24 26
25#include <linux/personality.h> 27#include <linux/personality.h>
26#include <linux/mm.h> 28#include <linux/mm.h>
27#include <linux/mman.h>
28#include <linux/module.h> 29#include <linux/module.h>
29#include <linux/random.h> 30#include <linux/random.h>
30#include <linux/compat.h>
31#include <asm/pgalloc.h> 31#include <asm/pgalloc.h>
32#include <asm/compat.h>
32 33
33static unsigned long stack_maxrandom_size(void) 34static unsigned long stack_maxrandom_size(void)
34{ 35{
@@ -98,20 +99,15 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
98 mm->unmap_area = arch_unmap_area_topdown; 99 mm->unmap_area = arch_unmap_area_topdown;
99 } 100 }
100} 101}
102EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
101 103
102#else 104#else
103 105
104int s390_mmap_check(unsigned long addr, unsigned long len) 106int s390_mmap_check(unsigned long addr, unsigned long len)
105{ 107{
106 int rc;
107
108 if (!is_compat_task() && 108 if (!is_compat_task() &&
109 len >= TASK_SIZE && TASK_SIZE < (1UL << 53)) { 109 len >= TASK_SIZE && TASK_SIZE < (1UL << 53))
110 rc = crst_table_upgrade(current->mm, 1UL << 53); 110 return crst_table_upgrade(current->mm, 1UL << 53);
111 if (rc)
112 return rc;
113 update_mm(current->mm, current);
114 }
115 return 0; 111 return 0;
116} 112}
117 113
@@ -131,7 +127,6 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
131 rc = crst_table_upgrade(mm, 1UL << 53); 127 rc = crst_table_upgrade(mm, 1UL << 53);
132 if (rc) 128 if (rc)
133 return (unsigned long) rc; 129 return (unsigned long) rc;
134 update_mm(mm, current);
135 area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); 130 area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
136 } 131 }
137 return area; 132 return area;
@@ -154,7 +149,6 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
154 rc = crst_table_upgrade(mm, 1UL << 53); 149 rc = crst_table_upgrade(mm, 1UL << 53);
155 if (rc) 150 if (rc)
156 return (unsigned long) rc; 151 return (unsigned long) rc;
157 update_mm(mm, current);
158 area = arch_get_unmapped_area_topdown(filp, addr, len, 152 area = arch_get_unmapped_area_topdown(filp, addr, len,
159 pgoff, flags); 153 pgoff, flags);
160 } 154 }
@@ -180,5 +174,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
180 mm->unmap_area = arch_unmap_area_topdown; 174 mm->unmap_area = arch_unmap_area_topdown;
181 } 175 }
182} 176}
177EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
183 178
184#endif 179#endif
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 29ccee3651f..d013ed39743 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -2,79 +2,30 @@
2 * Copyright IBM Corp. 2011 2 * Copyright IBM Corp. 2011
3 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> 3 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
4 */ 4 */
5#include <linux/hugetlb.h>
6#include <linux/module.h> 5#include <linux/module.h>
7#include <linux/mm.h> 6#include <linux/mm.h>
8#include <asm/cacheflush.h> 7#include <linux/hugetlb.h>
9#include <asm/pgtable.h> 8#include <asm/pgtable.h>
10#include <asm/page.h>
11
12void storage_key_init_range(unsigned long start, unsigned long end)
13{
14 unsigned long boundary, function, size;
15
16 while (start < end) {
17 if (MACHINE_HAS_EDAT2) {
18 /* set storage keys for a 2GB frame */
19 function = 0x22000 | PAGE_DEFAULT_KEY;
20 size = 1UL << 31;
21 boundary = (start + size) & ~(size - 1);
22 if (boundary <= end) {
23 do {
24 start = pfmf(function, start);
25 } while (start < boundary);
26 continue;
27 }
28 }
29 if (MACHINE_HAS_EDAT1) {
30 /* set storage keys for a 1MB frame */
31 function = 0x21000 | PAGE_DEFAULT_KEY;
32 size = 1UL << 20;
33 boundary = (start + size) & ~(size - 1);
34 if (boundary <= end) {
35 do {
36 start = pfmf(function, start);
37 } while (start < boundary);
38 continue;
39 }
40 }
41 page_set_storage_key(start, PAGE_DEFAULT_KEY, 0);
42 start += PAGE_SIZE;
43 }
44}
45
46static pte_t *walk_page_table(unsigned long addr)
47{
48 pgd_t *pgdp;
49 pud_t *pudp;
50 pmd_t *pmdp;
51 pte_t *ptep;
52
53 pgdp = pgd_offset_k(addr);
54 if (pgd_none(*pgdp))
55 return NULL;
56 pudp = pud_offset(pgdp, addr);
57 if (pud_none(*pudp) || pud_large(*pudp))
58 return NULL;
59 pmdp = pmd_offset(pudp, addr);
60 if (pmd_none(*pmdp) || pmd_large(*pmdp))
61 return NULL;
62 ptep = pte_offset_kernel(pmdp, addr);
63 if (pte_none(*ptep))
64 return NULL;
65 return ptep;
66}
67 9
68static void change_page_attr(unsigned long addr, int numpages, 10static void change_page_attr(unsigned long addr, int numpages,
69 pte_t (*set) (pte_t)) 11 pte_t (*set) (pte_t))
70{ 12{
71 pte_t *ptep, pte; 13 pte_t *ptep, pte;
14 pmd_t *pmdp;
15 pud_t *pudp;
16 pgd_t *pgdp;
72 int i; 17 int i;
73 18
74 for (i = 0; i < numpages; i++) { 19 for (i = 0; i < numpages; i++) {
75 ptep = walk_page_table(addr); 20 pgdp = pgd_offset(&init_mm, addr);
76 if (WARN_ON_ONCE(!ptep)) 21 pudp = pud_offset(pgdp, addr);
77 break; 22 pmdp = pmd_offset(pudp, addr);
23 if (pmd_huge(*pmdp)) {
24 WARN_ON_ONCE(1);
25 continue;
26 }
27 ptep = pte_offset_kernel(pmdp, addr);
28
78 pte = *ptep; 29 pte = *ptep;
79 pte = set(pte); 30 pte = set(pte);
80 __ptep_ipte(addr, ptep); 31 __ptep_ipte(addr, ptep);
@@ -88,63 +39,23 @@ int set_memory_ro(unsigned long addr, int numpages)
88 change_page_attr(addr, numpages, pte_wrprotect); 39 change_page_attr(addr, numpages, pte_wrprotect);
89 return 0; 40 return 0;
90} 41}
42EXPORT_SYMBOL_GPL(set_memory_ro);
91 43
92int set_memory_rw(unsigned long addr, int numpages) 44int set_memory_rw(unsigned long addr, int numpages)
93{ 45{
94 change_page_attr(addr, numpages, pte_mkwrite); 46 change_page_attr(addr, numpages, pte_mkwrite);
95 return 0; 47 return 0;
96} 48}
49EXPORT_SYMBOL_GPL(set_memory_rw);
97 50
98/* not possible */ 51/* not possible */
99int set_memory_nx(unsigned long addr, int numpages) 52int set_memory_nx(unsigned long addr, int numpages)
100{ 53{
101 return 0; 54 return 0;
102} 55}
56EXPORT_SYMBOL_GPL(set_memory_nx);
103 57
104int set_memory_x(unsigned long addr, int numpages) 58int set_memory_x(unsigned long addr, int numpages)
105{ 59{
106 return 0; 60 return 0;
107} 61}
108
109#ifdef CONFIG_DEBUG_PAGEALLOC
110void kernel_map_pages(struct page *page, int numpages, int enable)
111{
112 unsigned long address;
113 pgd_t *pgd;
114 pud_t *pud;
115 pmd_t *pmd;
116 pte_t *pte;
117 int i;
118
119 for (i = 0; i < numpages; i++) {
120 address = page_to_phys(page + i);
121 pgd = pgd_offset_k(address);
122 pud = pud_offset(pgd, address);
123 pmd = pmd_offset(pud, address);
124 pte = pte_offset_kernel(pmd, address);
125 if (!enable) {
126 __ptep_ipte(address, pte);
127 pte_val(*pte) = _PAGE_TYPE_EMPTY;
128 continue;
129 }
130 *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
131 }
132}
133
134#ifdef CONFIG_HIBERNATION
135bool kernel_page_present(struct page *page)
136{
137 unsigned long addr;
138 int cc;
139
140 addr = page_to_phys(page);
141 asm volatile(
142 " lra %1,0(%1)\n"
143 " ipm %0\n"
144 " srl %0,28"
145 : "=d" (cc), "+a" (addr) : : "cc");
146 return cc == 0;
147}
148#endif /* CONFIG_HIBERNATION */
149
150#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index ae44d2a3431..529a0883837 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright IBM Corp. 2007, 2011 2 * Copyright IBM Corp. 2007,2009
3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
4 */ 4 */
5 5
@@ -18,6 +18,7 @@
18#include <linux/rcupdate.h> 18#include <linux/rcupdate.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20 20
21#include <asm/system.h>
21#include <asm/pgtable.h> 22#include <asm/pgtable.h>
22#include <asm/pgalloc.h> 23#include <asm/pgalloc.h>
23#include <asm/tlb.h> 24#include <asm/tlb.h>
@@ -32,6 +33,17 @@
32#define FRAG_MASK 0x03 33#define FRAG_MASK 0x03
33#endif 34#endif
34 35
36unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
37EXPORT_SYMBOL(VMALLOC_START);
38
39static int __init parse_vmalloc(char *arg)
40{
41 if (!arg)
42 return -EINVAL;
43 VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK;
44 return 0;
45}
46early_param("vmalloc", parse_vmalloc);
35 47
36unsigned long *crst_table_alloc(struct mm_struct *mm) 48unsigned long *crst_table_alloc(struct mm_struct *mm)
37{ 49{
@@ -85,6 +97,7 @@ repeat:
85 crst_table_free(mm, table); 97 crst_table_free(mm, table);
86 if (mm->context.asce_limit < limit) 98 if (mm->context.asce_limit < limit)
87 goto repeat; 99 goto repeat;
100 update_mm(mm, current);
88 return 0; 101 return 0;
89} 102}
90 103
@@ -92,6 +105,9 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
92{ 105{
93 pgd_t *pgd; 106 pgd_t *pgd;
94 107
108 if (mm->context.asce_limit <= limit)
109 return;
110 __tlb_flush_mm(mm);
95 while (mm->context.asce_limit > limit) { 111 while (mm->context.asce_limit > limit) {
96 pgd = mm->pgd; 112 pgd = mm->pgd;
97 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { 113 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
@@ -114,6 +130,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
114 mm->task_size = mm->context.asce_limit; 130 mm->task_size = mm->context.asce_limit;
115 crst_table_free(mm, (unsigned long *) pgd); 131 crst_table_free(mm, (unsigned long *) pgd);
116 } 132 }
133 update_mm(mm, current);
117} 134}
118#endif 135#endif
119 136
@@ -205,7 +222,6 @@ void gmap_free(struct gmap *gmap)
205 222
206 /* Free all segment & region tables. */ 223 /* Free all segment & region tables. */
207 down_read(&gmap->mm->mmap_sem); 224 down_read(&gmap->mm->mmap_sem);
208 spin_lock(&gmap->mm->page_table_lock);
209 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { 225 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
210 table = (unsigned long *) page_to_phys(page); 226 table = (unsigned long *) page_to_phys(page);
211 if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) 227 if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
@@ -214,7 +230,6 @@ void gmap_free(struct gmap *gmap)
214 gmap_unlink_segment(gmap, table); 230 gmap_unlink_segment(gmap, table);
215 __free_pages(page, ALLOC_ORDER); 231 __free_pages(page, ALLOC_ORDER);
216 } 232 }
217 spin_unlock(&gmap->mm->page_table_lock);
218 up_read(&gmap->mm->mmap_sem); 233 up_read(&gmap->mm->mmap_sem);
219 list_del(&gmap->list); 234 list_del(&gmap->list);
220 kfree(gmap); 235 kfree(gmap);
@@ -241,29 +256,25 @@ void gmap_disable(struct gmap *gmap)
241} 256}
242EXPORT_SYMBOL_GPL(gmap_disable); 257EXPORT_SYMBOL_GPL(gmap_disable);
243 258
244/*
245 * gmap_alloc_table is assumed to be called with mmap_sem held
246 */
247static int gmap_alloc_table(struct gmap *gmap, 259static int gmap_alloc_table(struct gmap *gmap,
248 unsigned long *table, unsigned long init) 260 unsigned long *table, unsigned long init)
249{ 261{
250 struct page *page; 262 struct page *page;
251 unsigned long *new; 263 unsigned long *new;
252 264
253 /* since we dont free the gmap table until gmap_free we can unlock */
254 spin_unlock(&gmap->mm->page_table_lock);
255 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 265 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
256 spin_lock(&gmap->mm->page_table_lock);
257 if (!page) 266 if (!page)
258 return -ENOMEM; 267 return -ENOMEM;
259 new = (unsigned long *) page_to_phys(page); 268 new = (unsigned long *) page_to_phys(page);
260 crst_table_init(new, init); 269 crst_table_init(new, init);
270 down_read(&gmap->mm->mmap_sem);
261 if (*table & _REGION_ENTRY_INV) { 271 if (*table & _REGION_ENTRY_INV) {
262 list_add(&page->lru, &gmap->crst_list); 272 list_add(&page->lru, &gmap->crst_list);
263 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 273 *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
264 (*table & _REGION_ENTRY_TYPE_MASK); 274 (*table & _REGION_ENTRY_TYPE_MASK);
265 } else 275 } else
266 __free_pages(page, ALLOC_ORDER); 276 __free_pages(page, ALLOC_ORDER);
277 up_read(&gmap->mm->mmap_sem);
267 return 0; 278 return 0;
268} 279}
269 280
@@ -288,7 +299,6 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
288 299
289 flush = 0; 300 flush = 0;
290 down_read(&gmap->mm->mmap_sem); 301 down_read(&gmap->mm->mmap_sem);
291 spin_lock(&gmap->mm->page_table_lock);
292 for (off = 0; off < len; off += PMD_SIZE) { 302 for (off = 0; off < len; off += PMD_SIZE) {
293 /* Walk the guest addr space page table */ 303 /* Walk the guest addr space page table */
294 table = gmap->table + (((to + off) >> 53) & 0x7ff); 304 table = gmap->table + (((to + off) >> 53) & 0x7ff);
@@ -310,7 +320,6 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
310 *table = _SEGMENT_ENTRY_INV; 320 *table = _SEGMENT_ENTRY_INV;
311 } 321 }
312out: 322out:
313 spin_unlock(&gmap->mm->page_table_lock);
314 up_read(&gmap->mm->mmap_sem); 323 up_read(&gmap->mm->mmap_sem);
315 if (flush) 324 if (flush)
316 gmap_flush_tlb(gmap); 325 gmap_flush_tlb(gmap);
@@ -341,7 +350,6 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
341 350
342 flush = 0; 351 flush = 0;
343 down_read(&gmap->mm->mmap_sem); 352 down_read(&gmap->mm->mmap_sem);
344 spin_lock(&gmap->mm->page_table_lock);
345 for (off = 0; off < len; off += PMD_SIZE) { 353 for (off = 0; off < len; off += PMD_SIZE) {
346 /* Walk the gmap address space page table */ 354 /* Walk the gmap address space page table */
347 table = gmap->table + (((to + off) >> 53) & 0x7ff); 355 table = gmap->table + (((to + off) >> 53) & 0x7ff);
@@ -365,24 +373,19 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
365 flush |= gmap_unlink_segment(gmap, table); 373 flush |= gmap_unlink_segment(gmap, table);
366 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); 374 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off);
367 } 375 }
368 spin_unlock(&gmap->mm->page_table_lock);
369 up_read(&gmap->mm->mmap_sem); 376 up_read(&gmap->mm->mmap_sem);
370 if (flush) 377 if (flush)
371 gmap_flush_tlb(gmap); 378 gmap_flush_tlb(gmap);
372 return 0; 379 return 0;
373 380
374out_unmap: 381out_unmap:
375 spin_unlock(&gmap->mm->page_table_lock);
376 up_read(&gmap->mm->mmap_sem); 382 up_read(&gmap->mm->mmap_sem);
377 gmap_unmap_segment(gmap, to, len); 383 gmap_unmap_segment(gmap, to, len);
378 return -ENOMEM; 384 return -ENOMEM;
379} 385}
380EXPORT_SYMBOL_GPL(gmap_map_segment); 386EXPORT_SYMBOL_GPL(gmap_map_segment);
381 387
382/* 388unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
383 * this function is assumed to be called with mmap_sem held
384 */
385unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
386{ 389{
387 unsigned long *table, vmaddr, segment; 390 unsigned long *table, vmaddr, segment;
388 struct mm_struct *mm; 391 struct mm_struct *mm;
@@ -442,75 +445,16 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
442 page = pmd_page(*pmd); 445 page = pmd_page(*pmd);
443 mp = (struct gmap_pgtable *) page->index; 446 mp = (struct gmap_pgtable *) page->index;
444 rmap->entry = table; 447 rmap->entry = table;
445 spin_lock(&mm->page_table_lock);
446 list_add(&rmap->list, &mp->mapper); 448 list_add(&rmap->list, &mp->mapper);
447 spin_unlock(&mm->page_table_lock);
448 /* Set gmap segment table entry to page table. */ 449 /* Set gmap segment table entry to page table. */
449 *table = pmd_val(*pmd) & PAGE_MASK; 450 *table = pmd_val(*pmd) & PAGE_MASK;
450 return vmaddr | (address & ~PMD_MASK); 451 return vmaddr | (address & ~PMD_MASK);
451 } 452 }
452 return -EFAULT; 453 return -EFAULT;
453}
454 454
455unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
456{
457 unsigned long rc;
458
459 down_read(&gmap->mm->mmap_sem);
460 rc = __gmap_fault(address, gmap);
461 up_read(&gmap->mm->mmap_sem);
462
463 return rc;
464} 455}
465EXPORT_SYMBOL_GPL(gmap_fault); 456EXPORT_SYMBOL_GPL(gmap_fault);
466 457
467void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
468{
469
470 unsigned long *table, address, size;
471 struct vm_area_struct *vma;
472 struct gmap_pgtable *mp;
473 struct page *page;
474
475 down_read(&gmap->mm->mmap_sem);
476 address = from;
477 while (address < to) {
478 /* Walk the gmap address space page table */
479 table = gmap->table + ((address >> 53) & 0x7ff);
480 if (unlikely(*table & _REGION_ENTRY_INV)) {
481 address = (address + PMD_SIZE) & PMD_MASK;
482 continue;
483 }
484 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
485 table = table + ((address >> 42) & 0x7ff);
486 if (unlikely(*table & _REGION_ENTRY_INV)) {
487 address = (address + PMD_SIZE) & PMD_MASK;
488 continue;
489 }
490 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
491 table = table + ((address >> 31) & 0x7ff);
492 if (unlikely(*table & _REGION_ENTRY_INV)) {
493 address = (address + PMD_SIZE) & PMD_MASK;
494 continue;
495 }
496 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
497 table = table + ((address >> 20) & 0x7ff);
498 if (unlikely(*table & _SEGMENT_ENTRY_INV)) {
499 address = (address + PMD_SIZE) & PMD_MASK;
500 continue;
501 }
502 page = pfn_to_page(*table >> PAGE_SHIFT);
503 mp = (struct gmap_pgtable *) page->index;
504 vma = find_vma(gmap->mm, mp->vmaddr);
505 size = min(to - address, PMD_SIZE - (address & ~PMD_MASK));
506 zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK),
507 size, NULL);
508 address = (address + PMD_SIZE) & PMD_MASK;
509 }
510 up_read(&gmap->mm->mmap_sem);
511}
512EXPORT_SYMBOL_GPL(gmap_discard);
513
514void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) 458void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table)
515{ 459{
516 struct gmap_rmap *rmap, *next; 460 struct gmap_rmap *rmap, *next;
@@ -568,7 +512,7 @@ static inline void page_table_free_pgste(unsigned long *table)
568 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 512 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
569 mp = (struct gmap_pgtable *) page->index; 513 mp = (struct gmap_pgtable *) page->index;
570 BUG_ON(!list_empty(&mp->mapper)); 514 BUG_ON(!list_empty(&mp->mapper));
571 pgtable_page_dtor(page); 515 pgtable_page_ctor(page);
572 atomic_set(&page->_mapcount, -1); 516 atomic_set(&page->_mapcount, -1);
573 kfree(mp); 517 kfree(mp);
574 __free_page(page); 518 __free_page(page);
@@ -609,8 +553,8 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
609 */ 553 */
610unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) 554unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
611{ 555{
612 unsigned long *uninitialized_var(table); 556 struct page *page;
613 struct page *uninitialized_var(page); 557 unsigned long *table;
614 unsigned int mask, bit; 558 unsigned int mask, bit;
615 559
616 if (mm_has_pgste(mm)) 560 if (mm_has_pgste(mm))
@@ -673,6 +617,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
673 } 617 }
674} 618}
675 619
620#ifdef CONFIG_HAVE_RCU_TABLE_FREE
621
676static void __page_table_free_rcu(void *table, unsigned bit) 622static void __page_table_free_rcu(void *table, unsigned bit)
677{ 623{
678 struct page *page; 624 struct page *page;
@@ -726,90 +672,7 @@ void __tlb_remove_table(void *_table)
726 free_pages((unsigned long) table, ALLOC_ORDER); 672 free_pages((unsigned long) table, ALLOC_ORDER);
727} 673}
728 674
729static void tlb_remove_table_smp_sync(void *arg) 675#endif
730{
731 /* Simply deliver the interrupt */
732}
733
734static void tlb_remove_table_one(void *table)
735{
736 /*
737 * This isn't an RCU grace period and hence the page-tables cannot be
738 * assumed to be actually RCU-freed.
739 *
740 * It is however sufficient for software page-table walkers that rely
741 * on IRQ disabling. See the comment near struct mmu_table_batch.
742 */
743 smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
744 __tlb_remove_table(table);
745}
746
747static void tlb_remove_table_rcu(struct rcu_head *head)
748{
749 struct mmu_table_batch *batch;
750 int i;
751
752 batch = container_of(head, struct mmu_table_batch, rcu);
753
754 for (i = 0; i < batch->nr; i++)
755 __tlb_remove_table(batch->tables[i]);
756
757 free_page((unsigned long)batch);
758}
759
760void tlb_table_flush(struct mmu_gather *tlb)
761{
762 struct mmu_table_batch **batch = &tlb->batch;
763
764 if (*batch) {
765 __tlb_flush_mm(tlb->mm);
766 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
767 *batch = NULL;
768 }
769}
770
771void tlb_remove_table(struct mmu_gather *tlb, void *table)
772{
773 struct mmu_table_batch **batch = &tlb->batch;
774
775 if (*batch == NULL) {
776 *batch = (struct mmu_table_batch *)
777 __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
778 if (*batch == NULL) {
779 __tlb_flush_mm(tlb->mm);
780 tlb_remove_table_one(table);
781 return;
782 }
783 (*batch)->nr = 0;
784 }
785 (*batch)->tables[(*batch)->nr++] = table;
786 if ((*batch)->nr == MAX_TABLE_BATCH)
787 tlb_table_flush(tlb);
788}
789
790#ifdef CONFIG_TRANSPARENT_HUGEPAGE
791void thp_split_vma(struct vm_area_struct *vma)
792{
793 unsigned long addr;
794 struct page *page;
795
796 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
797 page = follow_page(vma, addr, FOLL_SPLIT);
798 }
799}
800
801void thp_split_mm(struct mm_struct *mm)
802{
803 struct vm_area_struct *vma = mm->mmap;
804
805 while (vma != NULL) {
806 thp_split_vma(vma);
807 vma->vm_flags &= ~VM_HUGEPAGE;
808 vma->vm_flags |= VM_NOHUGEPAGE;
809 vma = vma->vm_next;
810 }
811}
812#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
813 676
814/* 677/*
815 * switch on pgstes for its userspace process (for kvm) 678 * switch on pgstes for its userspace process (for kvm)
@@ -820,7 +683,7 @@ int s390_enable_sie(void)
820 struct mm_struct *mm, *old_mm; 683 struct mm_struct *mm, *old_mm;
821 684
822 /* Do we have switched amode? If no, we cannot do sie */ 685 /* Do we have switched amode? If no, we cannot do sie */
823 if (s390_user_mode == HOME_SPACE_MODE) 686 if (user_mode == HOME_SPACE_MODE)
824 return -EINVAL; 687 return -EINVAL;
825 688
826 /* Do we have pgstes? if yes, we are done */ 689 /* Do we have pgstes? if yes, we are done */
@@ -841,19 +704,11 @@ int s390_enable_sie(void)
841 704
842 /* we copy the mm and let dup_mm create the page tables with_pgstes */ 705 /* we copy the mm and let dup_mm create the page tables with_pgstes */
843 tsk->mm->context.alloc_pgste = 1; 706 tsk->mm->context.alloc_pgste = 1;
844 /* make sure that both mms have a correct rss state */
845 sync_mm_rss(tsk->mm);
846 mm = dup_mm(tsk); 707 mm = dup_mm(tsk);
847 tsk->mm->context.alloc_pgste = 0; 708 tsk->mm->context.alloc_pgste = 0;
848 if (!mm) 709 if (!mm)
849 return -ENOMEM; 710 return -ENOMEM;
850 711
851#ifdef CONFIG_TRANSPARENT_HUGEPAGE
852 /* split thp mappings and disable thp for future mappings */
853 thp_split_mm(mm);
854 mm->def_flags |= VM_NOHUGEPAGE;
855#endif
856
857 /* Now lets check again if something happened */ 712 /* Now lets check again if something happened */
858 task_lock(tsk); 713 task_lock(tsk);
859 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 714 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
@@ -881,80 +736,18 @@ int s390_enable_sie(void)
881} 736}
882EXPORT_SYMBOL_GPL(s390_enable_sie); 737EXPORT_SYMBOL_GPL(s390_enable_sie);
883 738
884#ifdef CONFIG_TRANSPARENT_HUGEPAGE 739#if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION)
885int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, 740bool kernel_page_present(struct page *page)
886 pmd_t *pmdp)
887{
888 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
889 /* No need to flush TLB
890 * On s390 reference bits are in storage key and never in TLB */
891 return pmdp_test_and_clear_young(vma, address, pmdp);
892}
893
894int pmdp_set_access_flags(struct vm_area_struct *vma,
895 unsigned long address, pmd_t *pmdp,
896 pmd_t entry, int dirty)
897{ 741{
898 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 742 unsigned long addr;
899 743 int cc;
900 if (pmd_same(*pmdp, entry))
901 return 0;
902 pmdp_invalidate(vma, address, pmdp);
903 set_pmd_at(vma->vm_mm, address, pmdp, entry);
904 return 1;
905}
906
907static void pmdp_splitting_flush_sync(void *arg)
908{
909 /* Simply deliver the interrupt */
910}
911
912void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
913 pmd_t *pmdp)
914{
915 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
916 if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT,
917 (unsigned long *) pmdp)) {
918 /* need to serialize against gup-fast (IRQ disabled) */
919 smp_call_function(pmdp_splitting_flush_sync, NULL, 1);
920 }
921}
922
923void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
924{
925 struct list_head *lh = (struct list_head *) pgtable;
926
927 assert_spin_locked(&mm->page_table_lock);
928
929 /* FIFO */
930 if (!mm->pmd_huge_pte)
931 INIT_LIST_HEAD(lh);
932 else
933 list_add(lh, (struct list_head *) mm->pmd_huge_pte);
934 mm->pmd_huge_pte = pgtable;
935}
936 744
937pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) 745 addr = page_to_phys(page);
938{ 746 asm volatile(
939 struct list_head *lh; 747 " lra %1,0(%1)\n"
940 pgtable_t pgtable; 748 " ipm %0\n"
941 pte_t *ptep; 749 " srl %0,28"
942 750 : "=d" (cc), "+a" (addr) : : "cc");
943 assert_spin_locked(&mm->page_table_lock); 751 return cc == 0;
944
945 /* FIFO */
946 pgtable = mm->pmd_huge_pte;
947 lh = (struct list_head *) pgtable;
948 if (list_empty(lh))
949 mm->pmd_huge_pte = NULL;
950 else {
951 mm->pmd_huge_pte = (pgtable_t) lh->next;
952 list_del(lh);
953 }
954 ptep = (pte_t *) pgtable;
955 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
956 ptep++;
957 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
958 return pgtable;
959} 752}
960#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 753#endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 6ed1426d27c..781ff516956 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -1,4 +1,6 @@
1/* 1/*
2 * arch/s390/mm/vmem.c
3 *
2 * Copyright IBM Corp. 2006 4 * Copyright IBM Corp. 2006
3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 5 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
4 */ 6 */
@@ -79,8 +81,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
79 */ 81 */
80static int vmem_add_mem(unsigned long start, unsigned long size, int ro) 82static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
81{ 83{
82 unsigned long end = start + size; 84 unsigned long address;
83 unsigned long address = start;
84 pgd_t *pg_dir; 85 pgd_t *pg_dir;
85 pud_t *pu_dir; 86 pud_t *pu_dir;
86 pmd_t *pm_dir; 87 pmd_t *pm_dir;
@@ -88,8 +89,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
88 pte_t pte; 89 pte_t pte;
89 int ret = -ENOMEM; 90 int ret = -ENOMEM;
90 91
91 while (address < end) { 92 for (address = start; address < start + size; address += PAGE_SIZE) {
92 pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
93 pg_dir = pgd_offset_k(address); 93 pg_dir = pgd_offset_k(address);
94 if (pgd_none(*pg_dir)) { 94 if (pgd_none(*pg_dir)) {
95 pu_dir = vmem_pud_alloc(); 95 pu_dir = vmem_pud_alloc();
@@ -97,30 +97,25 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
97 goto out; 97 goto out;
98 pgd_populate(&init_mm, pg_dir, pu_dir); 98 pgd_populate(&init_mm, pg_dir, pu_dir);
99 } 99 }
100
100 pu_dir = pud_offset(pg_dir, address); 101 pu_dir = pud_offset(pg_dir, address);
101#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
102 if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
103 !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
104 pte_val(pte) |= _REGION3_ENTRY_LARGE;
105 pte_val(pte) |= _REGION_ENTRY_TYPE_R3;
106 pud_val(*pu_dir) = pte_val(pte);
107 address += PUD_SIZE;
108 continue;
109 }
110#endif
111 if (pud_none(*pu_dir)) { 102 if (pud_none(*pu_dir)) {
112 pm_dir = vmem_pmd_alloc(); 103 pm_dir = vmem_pmd_alloc();
113 if (!pm_dir) 104 if (!pm_dir)
114 goto out; 105 goto out;
115 pud_populate(&init_mm, pu_dir, pm_dir); 106 pud_populate(&init_mm, pu_dir, pm_dir);
116 } 107 }
108
109 pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
117 pm_dir = pmd_offset(pu_dir, address); 110 pm_dir = pmd_offset(pu_dir, address);
118#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) 111
119 if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && 112#ifdef __s390x__
120 !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { 113 if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) &&
114 (address + HPAGE_SIZE <= start + size) &&
115 (address >= HPAGE_SIZE)) {
121 pte_val(pte) |= _SEGMENT_ENTRY_LARGE; 116 pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
122 pmd_val(*pm_dir) = pte_val(pte); 117 pmd_val(*pm_dir) = pte_val(pte);
123 address += PMD_SIZE; 118 address += HPAGE_SIZE - PAGE_SIZE;
124 continue; 119 continue;
125 } 120 }
126#endif 121#endif
@@ -133,11 +128,10 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
133 128
134 pt_dir = pte_offset_kernel(pm_dir, address); 129 pt_dir = pte_offset_kernel(pm_dir, address);
135 *pt_dir = pte; 130 *pt_dir = pte;
136 address += PAGE_SIZE;
137 } 131 }
138 ret = 0; 132 ret = 0;
139out: 133out:
140 flush_tlb_kernel_range(start, end); 134 flush_tlb_kernel_range(start, start + size);
141 return ret; 135 return ret;
142} 136}
143 137
@@ -147,8 +141,7 @@ out:
147 */ 141 */
148static void vmem_remove_range(unsigned long start, unsigned long size) 142static void vmem_remove_range(unsigned long start, unsigned long size)
149{ 143{
150 unsigned long end = start + size; 144 unsigned long address;
151 unsigned long address = start;
152 pgd_t *pg_dir; 145 pgd_t *pg_dir;
153 pud_t *pu_dir; 146 pud_t *pu_dir;
154 pmd_t *pm_dir; 147 pmd_t *pm_dir;
@@ -156,37 +149,25 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
156 pte_t pte; 149 pte_t pte;
157 150
158 pte_val(pte) = _PAGE_TYPE_EMPTY; 151 pte_val(pte) = _PAGE_TYPE_EMPTY;
159 while (address < end) { 152 for (address = start; address < start + size; address += PAGE_SIZE) {
160 pg_dir = pgd_offset_k(address); 153 pg_dir = pgd_offset_k(address);
161 if (pgd_none(*pg_dir)) {
162 address += PGDIR_SIZE;
163 continue;
164 }
165 pu_dir = pud_offset(pg_dir, address); 154 pu_dir = pud_offset(pg_dir, address);
166 if (pud_none(*pu_dir)) { 155 if (pud_none(*pu_dir))
167 address += PUD_SIZE;
168 continue; 156 continue;
169 }
170 if (pud_large(*pu_dir)) {
171 pud_clear(pu_dir);
172 address += PUD_SIZE;
173 continue;
174 }
175 pm_dir = pmd_offset(pu_dir, address); 157 pm_dir = pmd_offset(pu_dir, address);
176 if (pmd_none(*pm_dir)) { 158 if (pmd_none(*pm_dir))
177 address += PMD_SIZE;
178 continue; 159 continue;
179 } 160
180 if (pmd_large(*pm_dir)) { 161 if (pmd_huge(*pm_dir)) {
181 pmd_clear(pm_dir); 162 pmd_clear(pm_dir);
182 address += PMD_SIZE; 163 address += HPAGE_SIZE - PAGE_SIZE;
183 continue; 164 continue;
184 } 165 }
166
185 pt_dir = pte_offset_kernel(pm_dir, address); 167 pt_dir = pte_offset_kernel(pm_dir, address);
186 *pt_dir = pte; 168 *pt_dir = pte;
187 address += PAGE_SIZE;
188 } 169 }
189 flush_tlb_kernel_range(start, end); 170 flush_tlb_kernel_range(start, start + size);
190} 171}
191 172
192/* 173/*
@@ -205,7 +186,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
205 start_addr = (unsigned long) start; 186 start_addr = (unsigned long) start;
206 end_addr = (unsigned long) (start + nr); 187 end_addr = (unsigned long) (start + nr);
207 188
208 for (address = start_addr; address < end_addr;) { 189 for (address = start_addr; address < end_addr; address += PAGE_SIZE) {
209 pg_dir = pgd_offset_k(address); 190 pg_dir = pgd_offset_k(address);
210 if (pgd_none(*pg_dir)) { 191 if (pgd_none(*pg_dir)) {
211 pu_dir = vmem_pud_alloc(); 192 pu_dir = vmem_pud_alloc();
@@ -224,33 +205,10 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
224 205
225 pm_dir = pmd_offset(pu_dir, address); 206 pm_dir = pmd_offset(pu_dir, address);
226 if (pmd_none(*pm_dir)) { 207 if (pmd_none(*pm_dir)) {
227#ifdef CONFIG_64BIT
228 /* Use 1MB frames for vmemmap if available. We always
229 * use large frames even if they are only partially
230 * used.
231 * Otherwise we would have also page tables since
232 * vmemmap_populate gets called for each section
233 * separately. */
234 if (MACHINE_HAS_EDAT1) {
235 void *new_page;
236
237 new_page = vmemmap_alloc_block(PMD_SIZE, node);
238 if (!new_page)
239 goto out;
240 pte = mk_pte_phys(__pa(new_page), PAGE_RW);
241 pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
242 pmd_val(*pm_dir) = pte_val(pte);
243 address = (address + PMD_SIZE) & PMD_MASK;
244 continue;
245 }
246#endif
247 pt_dir = vmem_pte_alloc(address); 208 pt_dir = vmem_pte_alloc(address);
248 if (!pt_dir) 209 if (!pt_dir)
249 goto out; 210 goto out;
250 pmd_populate(&init_mm, pm_dir, pt_dir); 211 pmd_populate(&init_mm, pm_dir, pt_dir);
251 } else if (pmd_large(*pm_dir)) {
252 address = (address + PMD_SIZE) & PMD_MASK;
253 continue;
254 } 212 }
255 213
256 pt_dir = pte_offset_kernel(pm_dir, address); 214 pt_dir = pte_offset_kernel(pm_dir, address);
@@ -263,7 +221,6 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
263 pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL); 221 pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
264 *pt_dir = pte; 222 *pt_dir = pte;
265 } 223 }
266 address += PAGE_SIZE;
267 } 224 }
268 memset(start, 0, nr * sizeof(struct page)); 225 memset(start, 0, nr * sizeof(struct page));
269 ret = 0; 226 ret = 0;
@@ -375,12 +332,9 @@ void __init vmem_map_init(void)
375 unsigned long start, end; 332 unsigned long start, end;
376 int i; 333 int i;
377 334
378 ro_start = PFN_ALIGN((unsigned long)&_stext); 335 ro_start = ((unsigned long)&_stext) & PAGE_MASK;
379 ro_end = (unsigned long)&_eshared & PAGE_MASK; 336 ro_end = PFN_ALIGN((unsigned long)&_eshared);
380 for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { 337 for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
381 if (memory_chunk[i].type == CHUNK_CRASHK ||
382 memory_chunk[i].type == CHUNK_OLDMEM)
383 continue;
384 start = memory_chunk[i].addr; 338 start = memory_chunk[i].addr;
385 end = memory_chunk[i].addr + memory_chunk[i].size; 339 end = memory_chunk[i].addr + memory_chunk[i].size;
386 if (start >= ro_end || end <= ro_start) 340 if (start >= ro_end || end <= ro_start)
@@ -414,9 +368,6 @@ static int __init vmem_convert_memory_chunk(void)
414 for (i = 0; i < MEMORY_CHUNKS; i++) { 368 for (i = 0; i < MEMORY_CHUNKS; i++) {
415 if (!memory_chunk[i].size) 369 if (!memory_chunk[i].size)
416 continue; 370 continue;
417 if (memory_chunk[i].type == CHUNK_CRASHK ||
418 memory_chunk[i].type == CHUNK_OLDMEM)
419 continue;
420 seg = kzalloc(sizeof(*seg), GFP_KERNEL); 371 seg = kzalloc(sizeof(*seg), GFP_KERNEL);
421 if (!seg) 372 if (!seg)
422 panic("Out of memory...\n"); 373 panic("Out of memory...\n");