diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
commit | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch) | |
tree | a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /arch/s390/mm | |
parent | 406089d01562f1e2bf9f089fd7637009ebaad589 (diff) |
Patched in Tegra support.
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/Makefile | 11 | ||||
-rw-r--r-- | arch/s390/mm/cmm.c | 2 | ||||
-rw-r--r-- | arch/s390/mm/dump_pagetables.c | 231 | ||||
-rw-r--r-- | arch/s390/mm/extable.c | 81 | ||||
-rw-r--r-- | arch/s390/mm/extmem.c | 3 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 246 | ||||
-rw-r--r-- | arch/s390/mm/gup.c | 51 | ||||
-rw-r--r-- | arch/s390/mm/hugetlbpage.c | 4 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 86 | ||||
-rw-r--r-- | arch/s390/mm/maccess.c | 152 | ||||
-rw-r--r-- | arch/s390/mm/mmap.c | 19 | ||||
-rw-r--r-- | arch/s390/mm/pageattr.c | 121 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 287 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 101 |
14 files changed, 271 insertions, 1124 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 640bea12303..d98fe9004a5 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile | |||
@@ -2,9 +2,8 @@ | |||
2 | # Makefile for the linux s390-specific parts of the memory manager. | 2 | # Makefile for the linux s390-specific parts of the memory manager. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o | 5 | obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ |
6 | obj-y += page-states.o gup.o extable.o pageattr.o | 6 | page-states.o gup.o |
7 | 7 | obj-$(CONFIG_CMM) += cmm.o | |
8 | obj-$(CONFIG_CMM) += cmm.o | 8 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o |
9 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | 9 | obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o |
10 | obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o | ||
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 479e9428291..1f1dba9dcf5 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Collaborative memory management interface. | 2 | * Collaborative memory management interface. |
3 | * | 3 | * |
4 | * Copyright IBM Corp 2003, 2010 | 4 | * Copyright IBM Corp 2003,2010 |
5 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, | 5 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, |
6 | * | 6 | * |
7 | */ | 7 | */ |
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c deleted file mode 100644 index 04e4892247d..00000000000 --- a/arch/s390/mm/dump_pagetables.c +++ /dev/null | |||
@@ -1,231 +0,0 @@ | |||
1 | #include <linux/seq_file.h> | ||
2 | #include <linux/debugfs.h> | ||
3 | #include <linux/module.h> | ||
4 | #include <linux/mm.h> | ||
5 | #include <asm/sections.h> | ||
6 | #include <asm/pgtable.h> | ||
7 | |||
8 | static unsigned long max_addr; | ||
9 | |||
10 | struct addr_marker { | ||
11 | unsigned long start_address; | ||
12 | const char *name; | ||
13 | }; | ||
14 | |||
15 | enum address_markers_idx { | ||
16 | IDENTITY_NR = 0, | ||
17 | KERNEL_START_NR, | ||
18 | KERNEL_END_NR, | ||
19 | VMEMMAP_NR, | ||
20 | VMALLOC_NR, | ||
21 | #ifdef CONFIG_64BIT | ||
22 | MODULES_NR, | ||
23 | #endif | ||
24 | }; | ||
25 | |||
26 | static struct addr_marker address_markers[] = { | ||
27 | [IDENTITY_NR] = {0, "Identity Mapping"}, | ||
28 | [KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"}, | ||
29 | [KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"}, | ||
30 | [VMEMMAP_NR] = {0, "vmemmap Area"}, | ||
31 | [VMALLOC_NR] = {0, "vmalloc Area"}, | ||
32 | #ifdef CONFIG_64BIT | ||
33 | [MODULES_NR] = {0, "Modules Area"}, | ||
34 | #endif | ||
35 | { -1, NULL } | ||
36 | }; | ||
37 | |||
38 | struct pg_state { | ||
39 | int level; | ||
40 | unsigned int current_prot; | ||
41 | unsigned long start_address; | ||
42 | unsigned long current_address; | ||
43 | const struct addr_marker *marker; | ||
44 | }; | ||
45 | |||
46 | static void print_prot(struct seq_file *m, unsigned int pr, int level) | ||
47 | { | ||
48 | static const char * const level_name[] = | ||
49 | { "ASCE", "PGD", "PUD", "PMD", "PTE" }; | ||
50 | |||
51 | seq_printf(m, "%s ", level_name[level]); | ||
52 | if (pr & _PAGE_INVALID) | ||
53 | seq_printf(m, "I\n"); | ||
54 | else | ||
55 | seq_printf(m, "%s\n", pr & _PAGE_RO ? "RO" : "RW"); | ||
56 | } | ||
57 | |||
58 | static void note_page(struct seq_file *m, struct pg_state *st, | ||
59 | unsigned int new_prot, int level) | ||
60 | { | ||
61 | static const char units[] = "KMGTPE"; | ||
62 | int width = sizeof(unsigned long) * 2; | ||
63 | const char *unit = units; | ||
64 | unsigned int prot, cur; | ||
65 | unsigned long delta; | ||
66 | |||
67 | /* | ||
68 | * If we have a "break" in the series, we need to flush the state | ||
69 | * that we have now. "break" is either changing perms, levels or | ||
70 | * address space marker. | ||
71 | */ | ||
72 | prot = new_prot; | ||
73 | cur = st->current_prot; | ||
74 | |||
75 | if (!st->level) { | ||
76 | /* First entry */ | ||
77 | st->current_prot = new_prot; | ||
78 | st->level = level; | ||
79 | st->marker = address_markers; | ||
80 | seq_printf(m, "---[ %s ]---\n", st->marker->name); | ||
81 | } else if (prot != cur || level != st->level || | ||
82 | st->current_address >= st->marker[1].start_address) { | ||
83 | /* Print the actual finished series */ | ||
84 | seq_printf(m, "0x%0*lx-0x%0*lx", | ||
85 | width, st->start_address, | ||
86 | width, st->current_address); | ||
87 | delta = (st->current_address - st->start_address) >> 10; | ||
88 | while (!(delta & 0x3ff) && unit[1]) { | ||
89 | delta >>= 10; | ||
90 | unit++; | ||
91 | } | ||
92 | seq_printf(m, "%9lu%c ", delta, *unit); | ||
93 | print_prot(m, st->current_prot, st->level); | ||
94 | if (st->current_address >= st->marker[1].start_address) { | ||
95 | st->marker++; | ||
96 | seq_printf(m, "---[ %s ]---\n", st->marker->name); | ||
97 | } | ||
98 | st->start_address = st->current_address; | ||
99 | st->current_prot = new_prot; | ||
100 | st->level = level; | ||
101 | } | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * The actual page table walker functions. In order to keep the implementation | ||
106 | * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO | ||
107 | * flags to note_page() if a region, segment or page table entry is invalid or | ||
108 | * read-only. | ||
109 | * After all it's just a hint that the current level being walked contains an | ||
110 | * invalid or read-only entry. | ||
111 | */ | ||
112 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, | ||
113 | pmd_t *pmd, unsigned long addr) | ||
114 | { | ||
115 | unsigned int prot; | ||
116 | pte_t *pte; | ||
117 | int i; | ||
118 | |||
119 | for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { | ||
120 | st->current_address = addr; | ||
121 | pte = pte_offset_kernel(pmd, addr); | ||
122 | prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID); | ||
123 | note_page(m, st, prot, 4); | ||
124 | addr += PAGE_SIZE; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, | ||
129 | pud_t *pud, unsigned long addr) | ||
130 | { | ||
131 | unsigned int prot; | ||
132 | pmd_t *pmd; | ||
133 | int i; | ||
134 | |||
135 | for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) { | ||
136 | st->current_address = addr; | ||
137 | pmd = pmd_offset(pud, addr); | ||
138 | if (!pmd_none(*pmd)) { | ||
139 | if (pmd_large(*pmd)) { | ||
140 | prot = pmd_val(*pmd) & _SEGMENT_ENTRY_RO; | ||
141 | note_page(m, st, prot, 3); | ||
142 | } else | ||
143 | walk_pte_level(m, st, pmd, addr); | ||
144 | } else | ||
145 | note_page(m, st, _PAGE_INVALID, 3); | ||
146 | addr += PMD_SIZE; | ||
147 | } | ||
148 | } | ||
149 | |||
150 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, | ||
151 | pgd_t *pgd, unsigned long addr) | ||
152 | { | ||
153 | unsigned int prot; | ||
154 | pud_t *pud; | ||
155 | int i; | ||
156 | |||
157 | for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) { | ||
158 | st->current_address = addr; | ||
159 | pud = pud_offset(pgd, addr); | ||
160 | if (!pud_none(*pud)) | ||
161 | if (pud_large(*pud)) { | ||
162 | prot = pud_val(*pud) & _PAGE_RO; | ||
163 | note_page(m, st, prot, 2); | ||
164 | } else | ||
165 | walk_pmd_level(m, st, pud, addr); | ||
166 | else | ||
167 | note_page(m, st, _PAGE_INVALID, 2); | ||
168 | addr += PUD_SIZE; | ||
169 | } | ||
170 | } | ||
171 | |||
172 | static void walk_pgd_level(struct seq_file *m) | ||
173 | { | ||
174 | unsigned long addr = 0; | ||
175 | struct pg_state st; | ||
176 | pgd_t *pgd; | ||
177 | int i; | ||
178 | |||
179 | memset(&st, 0, sizeof(st)); | ||
180 | for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) { | ||
181 | st.current_address = addr; | ||
182 | pgd = pgd_offset_k(addr); | ||
183 | if (!pgd_none(*pgd)) | ||
184 | walk_pud_level(m, &st, pgd, addr); | ||
185 | else | ||
186 | note_page(m, &st, _PAGE_INVALID, 1); | ||
187 | addr += PGDIR_SIZE; | ||
188 | } | ||
189 | /* Flush out the last page */ | ||
190 | st.current_address = max_addr; | ||
191 | note_page(m, &st, 0, 0); | ||
192 | } | ||
193 | |||
194 | static int ptdump_show(struct seq_file *m, void *v) | ||
195 | { | ||
196 | walk_pgd_level(m); | ||
197 | return 0; | ||
198 | } | ||
199 | |||
200 | static int ptdump_open(struct inode *inode, struct file *filp) | ||
201 | { | ||
202 | return single_open(filp, ptdump_show, NULL); | ||
203 | } | ||
204 | |||
205 | static const struct file_operations ptdump_fops = { | ||
206 | .open = ptdump_open, | ||
207 | .read = seq_read, | ||
208 | .llseek = seq_lseek, | ||
209 | .release = single_release, | ||
210 | }; | ||
211 | |||
212 | static int pt_dump_init(void) | ||
213 | { | ||
214 | /* | ||
215 | * Figure out the maximum virtual address being accessible with the | ||
216 | * kernel ASCE. We need this to keep the page table walker functions | ||
217 | * from accessing non-existent entries. | ||
218 | */ | ||
219 | #ifdef CONFIG_32BIT | ||
220 | max_addr = 1UL << 31; | ||
221 | #else | ||
222 | max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; | ||
223 | max_addr = 1UL << (max_addr * 11 + 31); | ||
224 | address_markers[MODULES_NR].start_address = MODULES_VADDR; | ||
225 | #endif | ||
226 | address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; | ||
227 | address_markers[VMALLOC_NR].start_address = VMALLOC_START; | ||
228 | debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); | ||
229 | return 0; | ||
230 | } | ||
231 | device_initcall(pt_dump_init); | ||
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c deleted file mode 100644 index 4d1ee88864e..00000000000 --- a/arch/s390/mm/extable.c +++ /dev/null | |||
@@ -1,81 +0,0 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/sort.h> | ||
3 | #include <asm/uaccess.h> | ||
4 | |||
5 | /* | ||
6 | * Search one exception table for an entry corresponding to the | ||
7 | * given instruction address, and return the address of the entry, | ||
8 | * or NULL if none is found. | ||
9 | * We use a binary search, and thus we assume that the table is | ||
10 | * already sorted. | ||
11 | */ | ||
12 | const struct exception_table_entry * | ||
13 | search_extable(const struct exception_table_entry *first, | ||
14 | const struct exception_table_entry *last, | ||
15 | unsigned long value) | ||
16 | { | ||
17 | const struct exception_table_entry *mid; | ||
18 | unsigned long addr; | ||
19 | |||
20 | while (first <= last) { | ||
21 | mid = ((last - first) >> 1) + first; | ||
22 | addr = extable_insn(mid); | ||
23 | if (addr < value) | ||
24 | first = mid + 1; | ||
25 | else if (addr > value) | ||
26 | last = mid - 1; | ||
27 | else | ||
28 | return mid; | ||
29 | } | ||
30 | return NULL; | ||
31 | } | ||
32 | |||
33 | /* | ||
34 | * The exception table needs to be sorted so that the binary | ||
35 | * search that we use to find entries in it works properly. | ||
36 | * This is used both for the kernel exception table and for | ||
37 | * the exception tables of modules that get loaded. | ||
38 | * | ||
39 | */ | ||
40 | static int cmp_ex(const void *a, const void *b) | ||
41 | { | ||
42 | const struct exception_table_entry *x = a, *y = b; | ||
43 | |||
44 | /* This compare is only valid after normalization. */ | ||
45 | return x->insn - y->insn; | ||
46 | } | ||
47 | |||
48 | void sort_extable(struct exception_table_entry *start, | ||
49 | struct exception_table_entry *finish) | ||
50 | { | ||
51 | struct exception_table_entry *p; | ||
52 | int i; | ||
53 | |||
54 | /* Normalize entries to being relative to the start of the section */ | ||
55 | for (p = start, i = 0; p < finish; p++, i += 8) | ||
56 | p->insn += i; | ||
57 | sort(start, finish - start, sizeof(*start), cmp_ex, NULL); | ||
58 | /* Denormalize all entries */ | ||
59 | for (p = start, i = 0; p < finish; p++, i += 8) | ||
60 | p->insn -= i; | ||
61 | } | ||
62 | |||
63 | #ifdef CONFIG_MODULES | ||
64 | /* | ||
65 | * If the exception table is sorted, any referring to the module init | ||
66 | * will be at the beginning or the end. | ||
67 | */ | ||
68 | void trim_init_extable(struct module *m) | ||
69 | { | ||
70 | /* Trim the beginning */ | ||
71 | while (m->num_exentries && | ||
72 | within_module_init(extable_insn(&m->extable[0]), m)) { | ||
73 | m->extable++; | ||
74 | m->num_exentries--; | ||
75 | } | ||
76 | /* Trim the end */ | ||
77 | while (m->num_exentries && | ||
78 | within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m)) | ||
79 | m->num_exentries--; | ||
80 | } | ||
81 | #endif /* CONFIG_MODULES */ | ||
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 519bba716cc..075ddada491 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c | |||
@@ -1,9 +1,10 @@ | |||
1 | /* | 1 | /* |
2 | * File...........: arch/s390/mm/extmem.c | ||
2 | * Author(s)......: Carsten Otte <cotte@de.ibm.com> | 3 | * Author(s)......: Carsten Otte <cotte@de.ibm.com> |
3 | * Rob M van der Heij <rvdheij@nl.ibm.com> | 4 | * Rob M van der Heij <rvdheij@nl.ibm.com> |
4 | * Steven Shultz <shultzss@us.ibm.com> | 5 | * Steven Shultz <shultzss@us.ibm.com> |
5 | * Bugreports.to..: <Linux390@de.ibm.com> | 6 | * Bugreports.to..: <Linux390@de.ibm.com> |
6 | * Copyright IBM Corp. 2002, 2004 | 7 | * (C) IBM Corporation 2002-2004 |
7 | */ | 8 | */ |
8 | 9 | ||
9 | #define KMSG_COMPONENT "extmem" | 10 | #define KMSG_COMPONENT "extmem" |
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2fb9e63b8fc..9564fc779b2 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c | |||
@@ -1,6 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * arch/s390/mm/fault.c | ||
3 | * | ||
2 | * S390 version | 4 | * S390 version |
3 | * Copyright IBM Corp. 1999 | 5 | * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation |
4 | * Author(s): Hartmut Penner (hp@de.ibm.com) | 6 | * Author(s): Hartmut Penner (hp@de.ibm.com) |
5 | * Ulrich Weigand (uweigand@de.ibm.com) | 7 | * Ulrich Weigand (uweigand@de.ibm.com) |
6 | * | 8 | * |
@@ -30,10 +32,11 @@ | |||
30 | #include <linux/uaccess.h> | 32 | #include <linux/uaccess.h> |
31 | #include <linux/hugetlb.h> | 33 | #include <linux/hugetlb.h> |
32 | #include <asm/asm-offsets.h> | 34 | #include <asm/asm-offsets.h> |
35 | #include <asm/system.h> | ||
33 | #include <asm/pgtable.h> | 36 | #include <asm/pgtable.h> |
34 | #include <asm/irq.h> | 37 | #include <asm/irq.h> |
35 | #include <asm/mmu_context.h> | 38 | #include <asm/mmu_context.h> |
36 | #include <asm/facility.h> | 39 | #include <asm/compat.h> |
37 | #include "../kernel/entry.h" | 40 | #include "../kernel/entry.h" |
38 | 41 | ||
39 | #ifndef CONFIG_64BIT | 42 | #ifndef CONFIG_64BIT |
@@ -49,19 +52,14 @@ | |||
49 | #define VM_FAULT_BADCONTEXT 0x010000 | 52 | #define VM_FAULT_BADCONTEXT 0x010000 |
50 | #define VM_FAULT_BADMAP 0x020000 | 53 | #define VM_FAULT_BADMAP 0x020000 |
51 | #define VM_FAULT_BADACCESS 0x040000 | 54 | #define VM_FAULT_BADACCESS 0x040000 |
52 | #define VM_FAULT_SIGNAL 0x080000 | ||
53 | 55 | ||
54 | static unsigned long store_indication __read_mostly; | 56 | static unsigned long store_indication; |
55 | 57 | ||
56 | #ifdef CONFIG_64BIT | 58 | void fault_init(void) |
57 | static int __init fault_init(void) | ||
58 | { | 59 | { |
59 | if (test_facility(75)) | 60 | if (test_facility(2) && test_facility(75)) |
60 | store_indication = 0xc00; | 61 | store_indication = 0xc00; |
61 | return 0; | ||
62 | } | 62 | } |
63 | early_initcall(fault_init); | ||
64 | #endif | ||
65 | 63 | ||
66 | static inline int notify_page_fault(struct pt_regs *regs) | 64 | static inline int notify_page_fault(struct pt_regs *regs) |
67 | { | 65 | { |
@@ -115,7 +113,7 @@ static inline int user_space_fault(unsigned long trans_exc_code) | |||
115 | if (trans_exc_code == 2) | 113 | if (trans_exc_code == 2) |
116 | /* Access via secondary space, set_fs setting decides */ | 114 | /* Access via secondary space, set_fs setting decides */ |
117 | return current->thread.mm_segment.ar4; | 115 | return current->thread.mm_segment.ar4; |
118 | if (s390_user_mode == HOME_SPACE_MODE) | 116 | if (user_mode == HOME_SPACE_MODE) |
119 | /* User space if the access has been done via home space. */ | 117 | /* User space if the access has been done via home space. */ |
120 | return trans_exc_code == 3; | 118 | return trans_exc_code == 3; |
121 | /* | 119 | /* |
@@ -127,7 +125,8 @@ static inline int user_space_fault(unsigned long trans_exc_code) | |||
127 | return trans_exc_code != 3; | 125 | return trans_exc_code != 3; |
128 | } | 126 | } |
129 | 127 | ||
130 | static inline void report_user_fault(struct pt_regs *regs, long signr) | 128 | static inline void report_user_fault(struct pt_regs *regs, long int_code, |
129 | int signr, unsigned long address) | ||
131 | { | 130 | { |
132 | if ((task_pid_nr(current) > 1) && !show_unhandled_signals) | 131 | if ((task_pid_nr(current) > 1) && !show_unhandled_signals) |
133 | return; | 132 | return; |
@@ -135,12 +134,10 @@ static inline void report_user_fault(struct pt_regs *regs, long signr) | |||
135 | return; | 134 | return; |
136 | if (!printk_ratelimit()) | 135 | if (!printk_ratelimit()) |
137 | return; | 136 | return; |
138 | printk(KERN_ALERT "User process fault: interruption code 0x%X ", | 137 | printk("User process fault: interruption code 0x%lX ", int_code); |
139 | regs->int_code); | ||
140 | print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); | 138 | print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); |
141 | printk(KERN_CONT "\n"); | 139 | printk("\n"); |
142 | printk(KERN_ALERT "failing address: %lX\n", | 140 | printk("failing address: %lX\n", address); |
143 | regs->int_parm_long & __FAIL_ADDR_MASK); | ||
144 | show_regs(regs); | 141 | show_regs(regs); |
145 | } | 142 | } |
146 | 143 | ||
@@ -148,18 +145,24 @@ static inline void report_user_fault(struct pt_regs *regs, long signr) | |||
148 | * Send SIGSEGV to task. This is an external routine | 145 | * Send SIGSEGV to task. This is an external routine |
149 | * to keep the stack usage of do_page_fault small. | 146 | * to keep the stack usage of do_page_fault small. |
150 | */ | 147 | */ |
151 | static noinline void do_sigsegv(struct pt_regs *regs, int si_code) | 148 | static noinline void do_sigsegv(struct pt_regs *regs, long int_code, |
149 | int si_code, unsigned long trans_exc_code) | ||
152 | { | 150 | { |
153 | struct siginfo si; | 151 | struct siginfo si; |
152 | unsigned long address; | ||
154 | 153 | ||
155 | report_user_fault(regs, SIGSEGV); | 154 | address = trans_exc_code & __FAIL_ADDR_MASK; |
155 | current->thread.prot_addr = address; | ||
156 | current->thread.trap_no = int_code; | ||
157 | report_user_fault(regs, int_code, SIGSEGV, address); | ||
156 | si.si_signo = SIGSEGV; | 158 | si.si_signo = SIGSEGV; |
157 | si.si_code = si_code; | 159 | si.si_code = si_code; |
158 | si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); | 160 | si.si_addr = (void __user *) address; |
159 | force_sig_info(SIGSEGV, &si, current); | 161 | force_sig_info(SIGSEGV, &si, current); |
160 | } | 162 | } |
161 | 163 | ||
162 | static noinline void do_no_context(struct pt_regs *regs) | 164 | static noinline void do_no_context(struct pt_regs *regs, long int_code, |
165 | unsigned long trans_exc_code) | ||
163 | { | 166 | { |
164 | const struct exception_table_entry *fixup; | 167 | const struct exception_table_entry *fixup; |
165 | unsigned long address; | 168 | unsigned long address; |
@@ -167,7 +170,7 @@ static noinline void do_no_context(struct pt_regs *regs) | |||
167 | /* Are we prepared to handle this kernel fault? */ | 170 | /* Are we prepared to handle this kernel fault? */ |
168 | fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); | 171 | fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); |
169 | if (fixup) { | 172 | if (fixup) { |
170 | regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; | 173 | regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; |
171 | return; | 174 | return; |
172 | } | 175 | } |
173 | 176 | ||
@@ -175,48 +178,55 @@ static noinline void do_no_context(struct pt_regs *regs) | |||
175 | * Oops. The kernel tried to access some bad page. We'll have to | 178 | * Oops. The kernel tried to access some bad page. We'll have to |
176 | * terminate things with extreme prejudice. | 179 | * terminate things with extreme prejudice. |
177 | */ | 180 | */ |
178 | address = regs->int_parm_long & __FAIL_ADDR_MASK; | 181 | address = trans_exc_code & __FAIL_ADDR_MASK; |
179 | if (!user_space_fault(regs->int_parm_long)) | 182 | if (!user_space_fault(trans_exc_code)) |
180 | printk(KERN_ALERT "Unable to handle kernel pointer dereference" | 183 | printk(KERN_ALERT "Unable to handle kernel pointer dereference" |
181 | " at virtual kernel address %p\n", (void *)address); | 184 | " at virtual kernel address %p\n", (void *)address); |
182 | else | 185 | else |
183 | printk(KERN_ALERT "Unable to handle kernel paging request" | 186 | printk(KERN_ALERT "Unable to handle kernel paging request" |
184 | " at virtual user address %p\n", (void *)address); | 187 | " at virtual user address %p\n", (void *)address); |
185 | 188 | ||
186 | die(regs, "Oops"); | 189 | die("Oops", regs, int_code); |
187 | do_exit(SIGKILL); | 190 | do_exit(SIGKILL); |
188 | } | 191 | } |
189 | 192 | ||
190 | static noinline void do_low_address(struct pt_regs *regs) | 193 | static noinline void do_low_address(struct pt_regs *regs, long int_code, |
194 | unsigned long trans_exc_code) | ||
191 | { | 195 | { |
192 | /* Low-address protection hit in kernel mode means | 196 | /* Low-address protection hit in kernel mode means |
193 | NULL pointer write access in kernel mode. */ | 197 | NULL pointer write access in kernel mode. */ |
194 | if (regs->psw.mask & PSW_MASK_PSTATE) { | 198 | if (regs->psw.mask & PSW_MASK_PSTATE) { |
195 | /* Low-address protection hit in user mode 'cannot happen'. */ | 199 | /* Low-address protection hit in user mode 'cannot happen'. */ |
196 | die (regs, "Low-address protection"); | 200 | die ("Low-address protection", regs, int_code); |
197 | do_exit(SIGKILL); | 201 | do_exit(SIGKILL); |
198 | } | 202 | } |
199 | 203 | ||
200 | do_no_context(regs); | 204 | do_no_context(regs, int_code, trans_exc_code); |
201 | } | 205 | } |
202 | 206 | ||
203 | static noinline void do_sigbus(struct pt_regs *regs) | 207 | static noinline void do_sigbus(struct pt_regs *regs, long int_code, |
208 | unsigned long trans_exc_code) | ||
204 | { | 209 | { |
205 | struct task_struct *tsk = current; | 210 | struct task_struct *tsk = current; |
211 | unsigned long address; | ||
206 | struct siginfo si; | 212 | struct siginfo si; |
207 | 213 | ||
208 | /* | 214 | /* |
209 | * Send a sigbus, regardless of whether we were in kernel | 215 | * Send a sigbus, regardless of whether we were in kernel |
210 | * or user mode. | 216 | * or user mode. |
211 | */ | 217 | */ |
218 | address = trans_exc_code & __FAIL_ADDR_MASK; | ||
219 | tsk->thread.prot_addr = address; | ||
220 | tsk->thread.trap_no = int_code; | ||
212 | si.si_signo = SIGBUS; | 221 | si.si_signo = SIGBUS; |
213 | si.si_errno = 0; | 222 | si.si_errno = 0; |
214 | si.si_code = BUS_ADRERR; | 223 | si.si_code = BUS_ADRERR; |
215 | si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); | 224 | si.si_addr = (void __user *) address; |
216 | force_sig_info(SIGBUS, &si, tsk); | 225 | force_sig_info(SIGBUS, &si, tsk); |
217 | } | 226 | } |
218 | 227 | ||
219 | static noinline void do_fault_error(struct pt_regs *regs, int fault) | 228 | static noinline void do_fault_error(struct pt_regs *regs, long int_code, |
229 | unsigned long trans_exc_code, int fault) | ||
220 | { | 230 | { |
221 | int si_code; | 231 | int si_code; |
222 | 232 | ||
@@ -224,32 +234,28 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault) | |||
224 | case VM_FAULT_BADACCESS: | 234 | case VM_FAULT_BADACCESS: |
225 | case VM_FAULT_BADMAP: | 235 | case VM_FAULT_BADMAP: |
226 | /* Bad memory access. Check if it is kernel or user space. */ | 236 | /* Bad memory access. Check if it is kernel or user space. */ |
227 | if (user_mode(regs)) { | 237 | if (regs->psw.mask & PSW_MASK_PSTATE) { |
228 | /* User mode accesses just cause a SIGSEGV */ | 238 | /* User mode accesses just cause a SIGSEGV */ |
229 | si_code = (fault == VM_FAULT_BADMAP) ? | 239 | si_code = (fault == VM_FAULT_BADMAP) ? |
230 | SEGV_MAPERR : SEGV_ACCERR; | 240 | SEGV_MAPERR : SEGV_ACCERR; |
231 | do_sigsegv(regs, si_code); | 241 | do_sigsegv(regs, int_code, si_code, trans_exc_code); |
232 | return; | 242 | return; |
233 | } | 243 | } |
234 | case VM_FAULT_BADCONTEXT: | 244 | case VM_FAULT_BADCONTEXT: |
235 | do_no_context(regs); | 245 | do_no_context(regs, int_code, trans_exc_code); |
236 | break; | ||
237 | case VM_FAULT_SIGNAL: | ||
238 | if (!user_mode(regs)) | ||
239 | do_no_context(regs); | ||
240 | break; | 246 | break; |
241 | default: /* fault & VM_FAULT_ERROR */ | 247 | default: /* fault & VM_FAULT_ERROR */ |
242 | if (fault & VM_FAULT_OOM) { | 248 | if (fault & VM_FAULT_OOM) { |
243 | if (!user_mode(regs)) | 249 | if (!(regs->psw.mask & PSW_MASK_PSTATE)) |
244 | do_no_context(regs); | 250 | do_no_context(regs, int_code, trans_exc_code); |
245 | else | 251 | else |
246 | pagefault_out_of_memory(); | 252 | pagefault_out_of_memory(); |
247 | } else if (fault & VM_FAULT_SIGBUS) { | 253 | } else if (fault & VM_FAULT_SIGBUS) { |
248 | /* Kernel mode? Handle exceptions or die */ | 254 | /* Kernel mode? Handle exceptions or die */ |
249 | if (!user_mode(regs)) | 255 | if (!(regs->psw.mask & PSW_MASK_PSTATE)) |
250 | do_no_context(regs); | 256 | do_no_context(regs, int_code, trans_exc_code); |
251 | else | 257 | else |
252 | do_sigbus(regs); | 258 | do_sigbus(regs, int_code, trans_exc_code); |
253 | } else | 259 | } else |
254 | BUG(); | 260 | BUG(); |
255 | break; | 261 | break; |
@@ -267,28 +273,21 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault) | |||
267 | * 11 Page translation -> Not present (nullification) | 273 | * 11 Page translation -> Not present (nullification) |
268 | * 3b Region third trans. -> Not present (nullification) | 274 | * 3b Region third trans. -> Not present (nullification) |
269 | */ | 275 | */ |
270 | static inline int do_exception(struct pt_regs *regs, int access) | 276 | static inline int do_exception(struct pt_regs *regs, int access, |
277 | unsigned long trans_exc_code) | ||
271 | { | 278 | { |
272 | struct task_struct *tsk; | 279 | struct task_struct *tsk; |
273 | struct mm_struct *mm; | 280 | struct mm_struct *mm; |
274 | struct vm_area_struct *vma; | 281 | struct vm_area_struct *vma; |
275 | unsigned long trans_exc_code; | ||
276 | unsigned long address; | 282 | unsigned long address; |
277 | unsigned int flags; | 283 | unsigned int flags; |
278 | int fault; | 284 | int fault; |
279 | 285 | ||
280 | tsk = current; | ||
281 | /* | ||
282 | * The instruction that caused the program check has | ||
283 | * been nullified. Don't signal single step via SIGTRAP. | ||
284 | */ | ||
285 | clear_tsk_thread_flag(tsk, TIF_PER_TRAP); | ||
286 | |||
287 | if (notify_page_fault(regs)) | 286 | if (notify_page_fault(regs)) |
288 | return 0; | 287 | return 0; |
289 | 288 | ||
289 | tsk = current; | ||
290 | mm = tsk->mm; | 290 | mm = tsk->mm; |
291 | trans_exc_code = regs->int_parm_long; | ||
292 | 291 | ||
293 | /* | 292 | /* |
294 | * Verify that the fault happened in user space, that | 293 | * Verify that the fault happened in user space, that |
@@ -301,14 +300,14 @@ static inline int do_exception(struct pt_regs *regs, int access) | |||
301 | 300 | ||
302 | address = trans_exc_code & __FAIL_ADDR_MASK; | 301 | address = trans_exc_code & __FAIL_ADDR_MASK; |
303 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); | 302 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); |
304 | flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | 303 | flags = FAULT_FLAG_ALLOW_RETRY; |
305 | if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) | 304 | if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) |
306 | flags |= FAULT_FLAG_WRITE; | 305 | flags |= FAULT_FLAG_WRITE; |
307 | down_read(&mm->mmap_sem); | 306 | down_read(&mm->mmap_sem); |
308 | 307 | ||
309 | #ifdef CONFIG_PGSTE | 308 | #ifdef CONFIG_PGSTE |
310 | if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { | 309 | if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { |
311 | address = __gmap_fault(address, | 310 | address = gmap_fault(address, |
312 | (struct gmap *) S390_lowcore.gmap); | 311 | (struct gmap *) S390_lowcore.gmap); |
313 | if (address == -EFAULT) { | 312 | if (address == -EFAULT) { |
314 | fault = VM_FAULT_BADMAP; | 313 | fault = VM_FAULT_BADMAP; |
@@ -350,11 +349,6 @@ retry: | |||
350 | * the fault. | 349 | * the fault. |
351 | */ | 350 | */ |
352 | fault = handle_mm_fault(mm, vma, address, flags); | 351 | fault = handle_mm_fault(mm, vma, address, flags); |
353 | /* No reason to continue if interrupted by SIGKILL. */ | ||
354 | if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { | ||
355 | fault = VM_FAULT_SIGNAL; | ||
356 | goto out; | ||
357 | } | ||
358 | if (unlikely(fault & VM_FAULT_ERROR)) | 352 | if (unlikely(fault & VM_FAULT_ERROR)) |
359 | goto out_up; | 353 | goto out_up; |
360 | 354 | ||
@@ -377,11 +371,15 @@ retry: | |||
377 | /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk | 371 | /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk |
378 | * of starvation. */ | 372 | * of starvation. */ |
379 | flags &= ~FAULT_FLAG_ALLOW_RETRY; | 373 | flags &= ~FAULT_FLAG_ALLOW_RETRY; |
380 | flags |= FAULT_FLAG_TRIED; | ||
381 | down_read(&mm->mmap_sem); | 374 | down_read(&mm->mmap_sem); |
382 | goto retry; | 375 | goto retry; |
383 | } | 376 | } |
384 | } | 377 | } |
378 | /* | ||
379 | * The instruction that caused the program check will | ||
380 | * be repeated. Don't signal single step via SIGTRAP. | ||
381 | */ | ||
382 | clear_tsk_thread_flag(tsk, TIF_PER_TRAP); | ||
385 | fault = 0; | 383 | fault = 0; |
386 | out_up: | 384 | out_up: |
387 | up_read(&mm->mmap_sem); | 385 | up_read(&mm->mmap_sem); |
@@ -389,52 +387,45 @@ out: | |||
389 | return fault; | 387 | return fault; |
390 | } | 388 | } |
391 | 389 | ||
392 | void __kprobes do_protection_exception(struct pt_regs *regs) | 390 | void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code, |
391 | unsigned long trans_exc_code) | ||
393 | { | 392 | { |
394 | unsigned long trans_exc_code; | ||
395 | int fault; | 393 | int fault; |
396 | 394 | ||
397 | trans_exc_code = regs->int_parm_long; | ||
398 | /* Protection exception is suppressing, decrement psw address. */ | 395 | /* Protection exception is suppressing, decrement psw address. */ |
399 | regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); | 396 | regs->psw.addr -= (pgm_int_code >> 16); |
400 | /* | 397 | /* |
401 | * Check for low-address protection. This needs to be treated | 398 | * Check for low-address protection. This needs to be treated |
402 | * as a special case because the translation exception code | 399 | * as a special case because the translation exception code |
403 | * field is not guaranteed to contain valid data in this case. | 400 | * field is not guaranteed to contain valid data in this case. |
404 | */ | 401 | */ |
405 | if (unlikely(!(trans_exc_code & 4))) { | 402 | if (unlikely(!(trans_exc_code & 4))) { |
406 | do_low_address(regs); | 403 | do_low_address(regs, pgm_int_code, trans_exc_code); |
407 | return; | 404 | return; |
408 | } | 405 | } |
409 | fault = do_exception(regs, VM_WRITE); | 406 | fault = do_exception(regs, VM_WRITE, trans_exc_code); |
410 | if (unlikely(fault)) | 407 | if (unlikely(fault)) |
411 | do_fault_error(regs, fault); | 408 | do_fault_error(regs, 4, trans_exc_code, fault); |
412 | } | 409 | } |
413 | 410 | ||
414 | void __kprobes do_dat_exception(struct pt_regs *regs) | 411 | void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code, |
412 | unsigned long trans_exc_code) | ||
415 | { | 413 | { |
416 | int access, fault; | 414 | int access, fault; |
417 | 415 | ||
418 | access = VM_READ | VM_EXEC | VM_WRITE; | 416 | access = VM_READ | VM_EXEC | VM_WRITE; |
419 | fault = do_exception(regs, access); | 417 | fault = do_exception(regs, access, trans_exc_code); |
420 | if (unlikely(fault)) | 418 | if (unlikely(fault)) |
421 | do_fault_error(regs, fault); | 419 | do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault); |
422 | } | 420 | } |
423 | 421 | ||
424 | #ifdef CONFIG_64BIT | 422 | #ifdef CONFIG_64BIT |
425 | void __kprobes do_asce_exception(struct pt_regs *regs) | 423 | void __kprobes do_asce_exception(struct pt_regs *regs, long pgm_int_code, |
424 | unsigned long trans_exc_code) | ||
426 | { | 425 | { |
427 | struct mm_struct *mm = current->mm; | 426 | struct mm_struct *mm = current->mm; |
428 | struct vm_area_struct *vma; | 427 | struct vm_area_struct *vma; |
429 | unsigned long trans_exc_code; | ||
430 | 428 | ||
431 | /* | ||
432 | * The instruction that caused the program check has | ||
433 | * been nullified. Don't signal single step via SIGTRAP. | ||
434 | */ | ||
435 | clear_tsk_thread_flag(current, TIF_PER_TRAP); | ||
436 | |||
437 | trans_exc_code = regs->int_parm_long; | ||
438 | if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) | 429 | if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) |
439 | goto no_context; | 430 | goto no_context; |
440 | 431 | ||
@@ -448,13 +439,13 @@ void __kprobes do_asce_exception(struct pt_regs *regs) | |||
448 | } | 439 | } |
449 | 440 | ||
450 | /* User mode accesses just cause a SIGSEGV */ | 441 | /* User mode accesses just cause a SIGSEGV */ |
451 | if (user_mode(regs)) { | 442 | if (regs->psw.mask & PSW_MASK_PSTATE) { |
452 | do_sigsegv(regs, SEGV_MAPERR); | 443 | do_sigsegv(regs, pgm_int_code, SEGV_MAPERR, trans_exc_code); |
453 | return; | 444 | return; |
454 | } | 445 | } |
455 | 446 | ||
456 | no_context: | 447 | no_context: |
457 | do_no_context(regs); | 448 | do_no_context(regs, pgm_int_code, trans_exc_code); |
458 | } | 449 | } |
459 | #endif | 450 | #endif |
460 | 451 | ||
@@ -463,22 +454,20 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) | |||
463 | struct pt_regs regs; | 454 | struct pt_regs regs; |
464 | int access, fault; | 455 | int access, fault; |
465 | 456 | ||
466 | /* Emulate a uaccess fault from kernel mode. */ | 457 | regs.psw.mask = psw_kernel_bits; |
467 | regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK; | ||
468 | if (!irqs_disabled()) | 458 | if (!irqs_disabled()) |
469 | regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; | 459 | regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; |
470 | regs.psw.addr = (unsigned long) __builtin_return_address(0); | 460 | regs.psw.addr = (unsigned long) __builtin_return_address(0); |
471 | regs.psw.addr |= PSW_ADDR_AMODE; | 461 | regs.psw.addr |= PSW_ADDR_AMODE; |
472 | regs.int_code = pgm_int_code; | 462 | uaddr &= PAGE_MASK; |
473 | regs.int_parm_long = (uaddr & PAGE_MASK) | 2; | ||
474 | access = write ? VM_WRITE : VM_READ; | 463 | access = write ? VM_WRITE : VM_READ; |
475 | fault = do_exception(®s, access); | 464 | fault = do_exception(®s, access, uaddr | 2); |
476 | /* | 465 | if (unlikely(fault)) { |
477 | * Since the fault happened in kernel mode while performing a uaccess | 466 | if (fault & VM_FAULT_OOM) |
478 | * all we need to do now is emulating a fixup in case "fault" is not | 467 | return -EFAULT; |
479 | * zero. | 468 | else if (fault & VM_FAULT_SIGBUS) |
480 | * For the calling uaccess functions this results always in -EFAULT. | 469 | do_sigbus(®s, pgm_int_code, uaddr); |
481 | */ | 470 | } |
482 | return fault ? -EFAULT : 0; | 471 | return fault ? -EFAULT : 0; |
483 | } | 472 | } |
484 | 473 | ||
@@ -520,7 +509,7 @@ int pfault_init(void) | |||
520 | .reserved = __PF_RES_FIELD }; | 509 | .reserved = __PF_RES_FIELD }; |
521 | int rc; | 510 | int rc; |
522 | 511 | ||
523 | if (pfault_disable) | 512 | if (!MACHINE_IS_VM || pfault_disable) |
524 | return -1; | 513 | return -1; |
525 | asm volatile( | 514 | asm volatile( |
526 | " diag %1,%0,0x258\n" | 515 | " diag %1,%0,0x258\n" |
@@ -541,7 +530,7 @@ void pfault_fini(void) | |||
541 | .refversn = 2, | 530 | .refversn = 2, |
542 | }; | 531 | }; |
543 | 532 | ||
544 | if (pfault_disable) | 533 | if (!MACHINE_IS_VM || pfault_disable) |
545 | return; | 534 | return; |
546 | asm volatile( | 535 | asm volatile( |
547 | " diag %0,0,0x258\n" | 536 | " diag %0,0,0x258\n" |
@@ -553,7 +542,7 @@ void pfault_fini(void) | |||
553 | static DEFINE_SPINLOCK(pfault_lock); | 542 | static DEFINE_SPINLOCK(pfault_lock); |
554 | static LIST_HEAD(pfault_list); | 543 | static LIST_HEAD(pfault_list); |
555 | 544 | ||
556 | static void pfault_interrupt(struct ext_code ext_code, | 545 | static void pfault_interrupt(unsigned int ext_int_code, |
557 | unsigned int param32, unsigned long param64) | 546 | unsigned int param32, unsigned long param64) |
558 | { | 547 | { |
559 | struct task_struct *tsk; | 548 | struct task_struct *tsk; |
@@ -566,19 +555,23 @@ static void pfault_interrupt(struct ext_code ext_code, | |||
566 | * in the 'cpu address' field associated with the | 555 | * in the 'cpu address' field associated with the |
567 | * external interrupt. | 556 | * external interrupt. |
568 | */ | 557 | */ |
569 | subcode = ext_code.subcode; | 558 | subcode = ext_int_code >> 16; |
570 | if ((subcode & 0xff00) != __SUBCODE_MASK) | 559 | if ((subcode & 0xff00) != __SUBCODE_MASK) |
571 | return; | 560 | return; |
572 | inc_irq_stat(IRQEXT_PFL); | 561 | kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; |
573 | /* Get the token (= pid of the affected task). */ | 562 | if (subcode & 0x0080) { |
574 | pid = sizeof(void *) == 4 ? param32 : param64; | 563 | /* Get the token (= pid of the affected task). */ |
575 | rcu_read_lock(); | 564 | pid = sizeof(void *) == 4 ? param32 : param64; |
576 | tsk = find_task_by_pid_ns(pid, &init_pid_ns); | 565 | rcu_read_lock(); |
577 | if (tsk) | 566 | tsk = find_task_by_pid_ns(pid, &init_pid_ns); |
578 | get_task_struct(tsk); | 567 | if (tsk) |
579 | rcu_read_unlock(); | 568 | get_task_struct(tsk); |
580 | if (!tsk) | 569 | rcu_read_unlock(); |
581 | return; | 570 | if (!tsk) |
571 | return; | ||
572 | } else { | ||
573 | tsk = current; | ||
574 | } | ||
582 | spin_lock(&pfault_lock); | 575 | spin_lock(&pfault_lock); |
583 | if (subcode & 0x0080) { | 576 | if (subcode & 0x0080) { |
584 | /* signal bit is set -> a page has been swapped in by VM */ | 577 | /* signal bit is set -> a page has been swapped in by VM */ |
@@ -591,47 +584,30 @@ static void pfault_interrupt(struct ext_code ext_code, | |||
591 | tsk->thread.pfault_wait = 0; | 584 | tsk->thread.pfault_wait = 0; |
592 | list_del(&tsk->thread.list); | 585 | list_del(&tsk->thread.list); |
593 | wake_up_process(tsk); | 586 | wake_up_process(tsk); |
594 | put_task_struct(tsk); | ||
595 | } else { | 587 | } else { |
596 | /* Completion interrupt was faster than initial | 588 | /* Completion interrupt was faster than initial |
597 | * interrupt. Set pfault_wait to -1 so the initial | 589 | * interrupt. Set pfault_wait to -1 so the initial |
598 | * interrupt doesn't put the task to sleep. | 590 | * interrupt doesn't put the task to sleep. */ |
599 | * If the task is not running, ignore the completion | 591 | tsk->thread.pfault_wait = -1; |
600 | * interrupt since it must be a leftover of a PFAULT | ||
601 | * CANCEL operation which didn't remove all pending | ||
602 | * completion interrupts. */ | ||
603 | if (tsk->state == TASK_RUNNING) | ||
604 | tsk->thread.pfault_wait = -1; | ||
605 | } | 592 | } |
593 | put_task_struct(tsk); | ||
606 | } else { | 594 | } else { |
607 | /* signal bit not set -> a real page is missing. */ | 595 | /* signal bit not set -> a real page is missing. */ |
608 | if (WARN_ON_ONCE(tsk != current)) | 596 | if (tsk->thread.pfault_wait == -1) { |
609 | goto out; | ||
610 | if (tsk->thread.pfault_wait == 1) { | ||
611 | /* Already on the list with a reference: put to sleep */ | ||
612 | __set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
613 | set_tsk_need_resched(tsk); | ||
614 | } else if (tsk->thread.pfault_wait == -1) { | ||
615 | /* Completion interrupt was faster than the initial | 597 | /* Completion interrupt was faster than the initial |
616 | * interrupt (pfault_wait == -1). Set pfault_wait | 598 | * interrupt (pfault_wait == -1). Set pfault_wait |
617 | * back to zero and exit. */ | 599 | * back to zero and exit. */ |
618 | tsk->thread.pfault_wait = 0; | 600 | tsk->thread.pfault_wait = 0; |
619 | } else { | 601 | } else { |
620 | /* Initial interrupt arrived before completion | 602 | /* Initial interrupt arrived before completion |
621 | * interrupt. Let the task sleep. | 603 | * interrupt. Let the task sleep. */ |
622 | * An extra task reference is needed since a different | ||
623 | * cpu may set the task state to TASK_RUNNING again | ||
624 | * before the scheduler is reached. */ | ||
625 | get_task_struct(tsk); | ||
626 | tsk->thread.pfault_wait = 1; | 604 | tsk->thread.pfault_wait = 1; |
627 | list_add(&tsk->thread.list, &pfault_list); | 605 | list_add(&tsk->thread.list, &pfault_list); |
628 | __set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 606 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
629 | set_tsk_need_resched(tsk); | 607 | set_tsk_need_resched(tsk); |
630 | } | 608 | } |
631 | } | 609 | } |
632 | out: | ||
633 | spin_unlock(&pfault_lock); | 610 | spin_unlock(&pfault_lock); |
634 | put_task_struct(tsk); | ||
635 | } | 611 | } |
636 | 612 | ||
637 | static int __cpuinit pfault_cpu_notify(struct notifier_block *self, | 613 | static int __cpuinit pfault_cpu_notify(struct notifier_block *self, |
@@ -640,15 +616,15 @@ static int __cpuinit pfault_cpu_notify(struct notifier_block *self, | |||
640 | struct thread_struct *thread, *next; | 616 | struct thread_struct *thread, *next; |
641 | struct task_struct *tsk; | 617 | struct task_struct *tsk; |
642 | 618 | ||
643 | switch (action & ~CPU_TASKS_FROZEN) { | 619 | switch (action) { |
644 | case CPU_DEAD: | 620 | case CPU_DEAD: |
621 | case CPU_DEAD_FROZEN: | ||
645 | spin_lock_irq(&pfault_lock); | 622 | spin_lock_irq(&pfault_lock); |
646 | list_for_each_entry_safe(thread, next, &pfault_list, list) { | 623 | list_for_each_entry_safe(thread, next, &pfault_list, list) { |
647 | thread->pfault_wait = 0; | 624 | thread->pfault_wait = 0; |
648 | list_del(&thread->list); | 625 | list_del(&thread->list); |
649 | tsk = container_of(thread, struct task_struct, thread); | 626 | tsk = container_of(thread, struct task_struct, thread); |
650 | wake_up_process(tsk); | 627 | wake_up_process(tsk); |
651 | put_task_struct(tsk); | ||
652 | } | 628 | } |
653 | spin_unlock_irq(&pfault_lock); | 629 | spin_unlock_irq(&pfault_lock); |
654 | break; | 630 | break; |
@@ -662,6 +638,8 @@ static int __init pfault_irq_init(void) | |||
662 | { | 638 | { |
663 | int rc; | 639 | int rc; |
664 | 640 | ||
641 | if (!MACHINE_IS_VM) | ||
642 | return 0; | ||
665 | rc = register_external_interrupt(0x2603, pfault_interrupt); | 643 | rc = register_external_interrupt(0x2603, pfault_interrupt); |
666 | if (rc) | 644 | if (rc) |
667 | goto out_extint; | 645 | goto out_extint; |
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 1f5315d1215..65cb06e2af4 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c | |||
@@ -115,18 +115,9 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, | |||
115 | pmd = *pmdp; | 115 | pmd = *pmdp; |
116 | barrier(); | 116 | barrier(); |
117 | next = pmd_addr_end(addr, end); | 117 | next = pmd_addr_end(addr, end); |
118 | /* | 118 | if (pmd_none(pmd)) |
119 | * The pmd_trans_splitting() check below explains why | ||
120 | * pmdp_splitting_flush() has to serialize with | ||
121 | * smp_call_function() against our disabled IRQs, to stop | ||
122 | * this gup-fast code from running while we set the | ||
123 | * splitting bit in the pmd. Returning zero will take | ||
124 | * the slow path that will call wait_split_huge_page() | ||
125 | * if the pmd is still in splitting state. | ||
126 | */ | ||
127 | if (pmd_none(pmd) || pmd_trans_splitting(pmd)) | ||
128 | return 0; | 119 | return 0; |
129 | if (unlikely(pmd_large(pmd))) { | 120 | if (unlikely(pmd_huge(pmd))) { |
130 | if (!gup_huge_pmd(pmdp, pmd, addr, next, | 121 | if (!gup_huge_pmd(pmdp, pmd, addr, next, |
131 | write, pages, nr)) | 122 | write, pages, nr)) |
132 | return 0; | 123 | return 0; |
@@ -163,42 +154,6 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, | |||
163 | return 1; | 154 | return 1; |
164 | } | 155 | } |
165 | 156 | ||
166 | /* | ||
167 | * Like get_user_pages_fast() except its IRQ-safe in that it won't fall | ||
168 | * back to the regular GUP. | ||
169 | */ | ||
170 | int __get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
171 | struct page **pages) | ||
172 | { | ||
173 | struct mm_struct *mm = current->mm; | ||
174 | unsigned long addr, len, end; | ||
175 | unsigned long next, flags; | ||
176 | pgd_t *pgdp, pgd; | ||
177 | int nr = 0; | ||
178 | |||
179 | start &= PAGE_MASK; | ||
180 | addr = start; | ||
181 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
182 | end = start + len; | ||
183 | if ((end < start) || (end > TASK_SIZE)) | ||
184 | return 0; | ||
185 | |||
186 | local_irq_save(flags); | ||
187 | pgdp = pgd_offset(mm, addr); | ||
188 | do { | ||
189 | pgd = *pgdp; | ||
190 | barrier(); | ||
191 | next = pgd_addr_end(addr, end); | ||
192 | if (pgd_none(pgd)) | ||
193 | break; | ||
194 | if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) | ||
195 | break; | ||
196 | } while (pgdp++, addr = next, addr != end); | ||
197 | local_irq_restore(flags); | ||
198 | |||
199 | return nr; | ||
200 | } | ||
201 | |||
202 | /** | 157 | /** |
203 | * get_user_pages_fast() - pin user pages in memory | 158 | * get_user_pages_fast() - pin user pages in memory |
204 | * @start: starting user address | 159 | * @start: starting user address |
@@ -228,7 +183,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
228 | addr = start; | 183 | addr = start; |
229 | len = (unsigned long) nr_pages << PAGE_SHIFT; | 184 | len = (unsigned long) nr_pages << PAGE_SHIFT; |
230 | end = start + len; | 185 | end = start + len; |
231 | if ((end < start) || (end > TASK_SIZE)) | 186 | if (end < start) |
232 | goto slow_irqon; | 187 | goto slow_irqon; |
233 | 188 | ||
234 | /* | 189 | /* |
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 532525ec88c..597bb2d27c3 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * IBM System z Huge TLB Page Support for Kernel. | 2 | * IBM System z Huge TLB Page Support for Kernel. |
3 | * | 3 | * |
4 | * Copyright IBM Corp. 2007 | 4 | * Copyright 2007 IBM Corp. |
5 | * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> | 5 | * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> |
6 | */ | 6 | */ |
7 | 7 | ||
@@ -58,8 +58,6 @@ void arch_release_hugepage(struct page *page) | |||
58 | ptep = (pte_t *) page[1].index; | 58 | ptep = (pte_t *) page[1].index; |
59 | if (!ptep) | 59 | if (!ptep) |
60 | return; | 60 | return; |
61 | clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY, | ||
62 | PTRS_PER_PTE * sizeof(pte_t)); | ||
63 | page_table_free(&init_mm, (unsigned long *) ptep); | 61 | page_table_free(&init_mm, (unsigned long *) ptep); |
64 | page[1].index = 0; | 62 | page[1].index = 0; |
65 | } | 63 | } |
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ae672f41c46..59b663109d9 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c | |||
@@ -1,6 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * arch/s390/mm/init.c | ||
3 | * | ||
2 | * S390 version | 4 | * S390 version |
3 | * Copyright IBM Corp. 1999 | 5 | * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation |
4 | * Author(s): Hartmut Penner (hp@de.ibm.com) | 6 | * Author(s): Hartmut Penner (hp@de.ibm.com) |
5 | * | 7 | * |
6 | * Derived from "arch/i386/mm/init.c" | 8 | * Derived from "arch/i386/mm/init.c" |
@@ -24,9 +26,9 @@ | |||
24 | #include <linux/pfn.h> | 26 | #include <linux/pfn.h> |
25 | #include <linux/poison.h> | 27 | #include <linux/poison.h> |
26 | #include <linux/initrd.h> | 28 | #include <linux/initrd.h> |
27 | #include <linux/export.h> | ||
28 | #include <linux/gfp.h> | 29 | #include <linux/gfp.h> |
29 | #include <asm/processor.h> | 30 | #include <asm/processor.h> |
31 | #include <asm/system.h> | ||
30 | #include <asm/uaccess.h> | 32 | #include <asm/uaccess.h> |
31 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
32 | #include <asm/pgalloc.h> | 34 | #include <asm/pgalloc.h> |
@@ -35,14 +37,13 @@ | |||
35 | #include <asm/tlb.h> | 37 | #include <asm/tlb.h> |
36 | #include <asm/tlbflush.h> | 38 | #include <asm/tlbflush.h> |
37 | #include <asm/sections.h> | 39 | #include <asm/sections.h> |
38 | #include <asm/ctl_reg.h> | ||
39 | 40 | ||
40 | pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); | 41 | pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); |
41 | 42 | ||
42 | unsigned long empty_zero_page, zero_page_mask; | 43 | unsigned long empty_zero_page, zero_page_mask; |
43 | EXPORT_SYMBOL(empty_zero_page); | 44 | EXPORT_SYMBOL(empty_zero_page); |
44 | 45 | ||
45 | static unsigned long __init setup_zero_pages(void) | 46 | static unsigned long setup_zero_pages(void) |
46 | { | 47 | { |
47 | struct cpuid cpu_id; | 48 | struct cpuid cpu_id; |
48 | unsigned int order; | 49 | unsigned int order; |
@@ -91,22 +92,18 @@ static unsigned long __init setup_zero_pages(void) | |||
91 | void __init paging_init(void) | 92 | void __init paging_init(void) |
92 | { | 93 | { |
93 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 94 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
94 | unsigned long pgd_type, asce_bits; | 95 | unsigned long pgd_type; |
95 | 96 | ||
96 | init_mm.pgd = swapper_pg_dir; | 97 | init_mm.pgd = swapper_pg_dir; |
98 | S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK; | ||
97 | #ifdef CONFIG_64BIT | 99 | #ifdef CONFIG_64BIT |
98 | if (VMALLOC_END > (1UL << 42)) { | 100 | /* A three level page table (4TB) is enough for the kernel space. */ |
99 | asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; | 101 | S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; |
100 | pgd_type = _REGION2_ENTRY_EMPTY; | 102 | pgd_type = _REGION3_ENTRY_EMPTY; |
101 | } else { | ||
102 | asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; | ||
103 | pgd_type = _REGION3_ENTRY_EMPTY; | ||
104 | } | ||
105 | #else | 103 | #else |
106 | asce_bits = _ASCE_TABLE_LENGTH; | 104 | S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH; |
107 | pgd_type = _SEGMENT_ENTRY_EMPTY; | 105 | pgd_type = _SEGMENT_ENTRY_EMPTY; |
108 | #endif | 106 | #endif |
109 | S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; | ||
110 | clear_table((unsigned long *) init_mm.pgd, pgd_type, | 107 | clear_table((unsigned long *) init_mm.pgd, pgd_type, |
111 | sizeof(unsigned long)*2048); | 108 | sizeof(unsigned long)*2048); |
112 | vmem_map_init(); | 109 | vmem_map_init(); |
@@ -125,6 +122,7 @@ void __init paging_init(void) | |||
125 | max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); | 122 | max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); |
126 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | 123 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; |
127 | free_area_init_nodes(max_zone_pfns); | 124 | free_area_init_nodes(max_zone_pfns); |
125 | fault_init(); | ||
128 | } | 126 | } |
129 | 127 | ||
130 | void __init mem_init(void) | 128 | void __init mem_init(void) |
@@ -158,6 +156,34 @@ void __init mem_init(void) | |||
158 | PFN_ALIGN((unsigned long)&_eshared) - 1); | 156 | PFN_ALIGN((unsigned long)&_eshared) - 1); |
159 | } | 157 | } |
160 | 158 | ||
159 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
160 | void kernel_map_pages(struct page *page, int numpages, int enable) | ||
161 | { | ||
162 | pgd_t *pgd; | ||
163 | pud_t *pud; | ||
164 | pmd_t *pmd; | ||
165 | pte_t *pte; | ||
166 | unsigned long address; | ||
167 | int i; | ||
168 | |||
169 | for (i = 0; i < numpages; i++) { | ||
170 | address = page_to_phys(page + i); | ||
171 | pgd = pgd_offset_k(address); | ||
172 | pud = pud_offset(pgd, address); | ||
173 | pmd = pmd_offset(pud, address); | ||
174 | pte = pte_offset_kernel(pmd, address); | ||
175 | if (!enable) { | ||
176 | __ptep_ipte(address, pte); | ||
177 | pte_val(*pte) = _PAGE_TYPE_EMPTY; | ||
178 | continue; | ||
179 | } | ||
180 | *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW)); | ||
181 | /* Flush cpu write queue. */ | ||
182 | mb(); | ||
183 | } | ||
184 | } | ||
185 | #endif | ||
186 | |||
161 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | 187 | void free_init_pages(char *what, unsigned long begin, unsigned long end) |
162 | { | 188 | { |
163 | unsigned long addr = begin; | 189 | unsigned long addr = begin; |
@@ -183,7 +209,7 @@ void free_initmem(void) | |||
183 | } | 209 | } |
184 | 210 | ||
185 | #ifdef CONFIG_BLK_DEV_INITRD | 211 | #ifdef CONFIG_BLK_DEV_INITRD |
186 | void __init free_initrd_mem(unsigned long start, unsigned long end) | 212 | void free_initrd_mem(unsigned long start, unsigned long end) |
187 | { | 213 | { |
188 | free_init_pages("initrd memory", start, end); | 214 | free_init_pages("initrd memory", start, end); |
189 | } | 215 | } |
@@ -192,38 +218,16 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) | |||
192 | #ifdef CONFIG_MEMORY_HOTPLUG | 218 | #ifdef CONFIG_MEMORY_HOTPLUG |
193 | int arch_add_memory(int nid, u64 start, u64 size) | 219 | int arch_add_memory(int nid, u64 start, u64 size) |
194 | { | 220 | { |
195 | unsigned long zone_start_pfn, zone_end_pfn, nr_pages; | 221 | struct pglist_data *pgdat; |
196 | unsigned long start_pfn = PFN_DOWN(start); | ||
197 | unsigned long size_pages = PFN_DOWN(size); | ||
198 | struct zone *zone; | 222 | struct zone *zone; |
199 | int rc; | 223 | int rc; |
200 | 224 | ||
225 | pgdat = NODE_DATA(nid); | ||
226 | zone = pgdat->node_zones + ZONE_MOVABLE; | ||
201 | rc = vmem_add_mapping(start, size); | 227 | rc = vmem_add_mapping(start, size); |
202 | if (rc) | 228 | if (rc) |
203 | return rc; | 229 | return rc; |
204 | for_each_zone(zone) { | 230 | rc = __add_pages(nid, zone, PFN_DOWN(start), PFN_DOWN(size)); |
205 | if (zone_idx(zone) != ZONE_MOVABLE) { | ||
206 | /* Add range within existing zone limits */ | ||
207 | zone_start_pfn = zone->zone_start_pfn; | ||
208 | zone_end_pfn = zone->zone_start_pfn + | ||
209 | zone->spanned_pages; | ||
210 | } else { | ||
211 | /* Add remaining range to ZONE_MOVABLE */ | ||
212 | zone_start_pfn = start_pfn; | ||
213 | zone_end_pfn = start_pfn + size_pages; | ||
214 | } | ||
215 | if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn) | ||
216 | continue; | ||
217 | nr_pages = (start_pfn + size_pages > zone_end_pfn) ? | ||
218 | zone_end_pfn - start_pfn : size_pages; | ||
219 | rc = __add_pages(nid, zone, start_pfn, nr_pages); | ||
220 | if (rc) | ||
221 | break; | ||
222 | start_pfn += nr_pages; | ||
223 | size_pages -= nr_pages; | ||
224 | if (!size_pages) | ||
225 | break; | ||
226 | } | ||
227 | if (rc) | 231 | if (rc) |
228 | vmem_remove_mapping(start, size); | 232 | vmem_remove_mapping(start, size); |
229 | return rc; | 233 | return rc; |
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 921fa541dc0..5dbbaa6e594 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c | |||
@@ -11,9 +11,7 @@ | |||
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | #include <linux/errno.h> | 13 | #include <linux/errno.h> |
14 | #include <linux/gfp.h> | 14 | #include <asm/system.h> |
15 | #include <linux/cpu.h> | ||
16 | #include <asm/ctl_reg.h> | ||
17 | 15 | ||
18 | /* | 16 | /* |
19 | * This function writes to kernel memory bypassing DAT and possible | 17 | * This function writes to kernel memory bypassing DAT and possible |
@@ -62,14 +60,18 @@ long probe_kernel_write(void *dst, const void *src, size_t size) | |||
62 | return copied < 0 ? -EFAULT : 0; | 60 | return copied < 0 ? -EFAULT : 0; |
63 | } | 61 | } |
64 | 62 | ||
65 | static int __memcpy_real(void *dest, void *src, size_t count) | 63 | int memcpy_real(void *dest, void *src, size_t count) |
66 | { | 64 | { |
67 | register unsigned long _dest asm("2") = (unsigned long) dest; | 65 | register unsigned long _dest asm("2") = (unsigned long) dest; |
68 | register unsigned long _len1 asm("3") = (unsigned long) count; | 66 | register unsigned long _len1 asm("3") = (unsigned long) count; |
69 | register unsigned long _src asm("4") = (unsigned long) src; | 67 | register unsigned long _src asm("4") = (unsigned long) src; |
70 | register unsigned long _len2 asm("5") = (unsigned long) count; | 68 | register unsigned long _len2 asm("5") = (unsigned long) count; |
69 | unsigned long flags; | ||
71 | int rc = -EFAULT; | 70 | int rc = -EFAULT; |
72 | 71 | ||
72 | if (!count) | ||
73 | return 0; | ||
74 | flags = __arch_local_irq_stnsm(0xf8UL); | ||
73 | asm volatile ( | 75 | asm volatile ( |
74 | "0: mvcle %1,%2,0x0\n" | 76 | "0: mvcle %1,%2,0x0\n" |
75 | "1: jo 0b\n" | 77 | "1: jo 0b\n" |
@@ -80,150 +82,22 @@ static int __memcpy_real(void *dest, void *src, size_t count) | |||
80 | "+d" (_len2), "=m" (*((long *) dest)) | 82 | "+d" (_len2), "=m" (*((long *) dest)) |
81 | : "m" (*((long *) src)) | 83 | : "m" (*((long *) src)) |
82 | : "cc", "memory"); | 84 | : "cc", "memory"); |
85 | arch_local_irq_restore(flags); | ||
83 | return rc; | 86 | return rc; |
84 | } | 87 | } |
85 | 88 | ||
86 | /* | 89 | /* |
87 | * Copy memory in real mode (kernel to kernel) | 90 | * Copy memory to absolute zero |
88 | */ | ||
89 | int memcpy_real(void *dest, void *src, size_t count) | ||
90 | { | ||
91 | unsigned long flags; | ||
92 | int rc; | ||
93 | |||
94 | if (!count) | ||
95 | return 0; | ||
96 | local_irq_save(flags); | ||
97 | __arch_local_irq_stnsm(0xfbUL); | ||
98 | rc = __memcpy_real(dest, src, count); | ||
99 | local_irq_restore(flags); | ||
100 | return rc; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Copy memory in absolute mode (kernel to kernel) | ||
105 | */ | 91 | */ |
106 | void memcpy_absolute(void *dest, void *src, size_t count) | 92 | void copy_to_absolute_zero(void *dest, void *src, size_t count) |
107 | { | 93 | { |
108 | unsigned long cr0, flags, prefix; | 94 | unsigned long cr0; |
109 | 95 | ||
110 | flags = arch_local_irq_save(); | 96 | BUG_ON((unsigned long) dest + count >= sizeof(struct _lowcore)); |
97 | preempt_disable(); | ||
111 | __ctl_store(cr0, 0, 0); | 98 | __ctl_store(cr0, 0, 0); |
112 | __ctl_clear_bit(0, 28); /* disable lowcore protection */ | 99 | __ctl_clear_bit(0, 28); /* disable lowcore protection */ |
113 | prefix = store_prefix(); | 100 | memcpy_real(dest + store_prefix(), src, count); |
114 | if (prefix) { | ||
115 | local_mcck_disable(); | ||
116 | set_prefix(0); | ||
117 | memcpy(dest, src, count); | ||
118 | set_prefix(prefix); | ||
119 | local_mcck_enable(); | ||
120 | } else { | ||
121 | memcpy(dest, src, count); | ||
122 | } | ||
123 | __ctl_load(cr0, 0, 0); | 101 | __ctl_load(cr0, 0, 0); |
124 | arch_local_irq_restore(flags); | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Copy memory from kernel (real) to user (virtual) | ||
129 | */ | ||
130 | int copy_to_user_real(void __user *dest, void *src, size_t count) | ||
131 | { | ||
132 | int offs = 0, size, rc; | ||
133 | char *buf; | ||
134 | |||
135 | buf = (char *) __get_free_page(GFP_KERNEL); | ||
136 | if (!buf) | ||
137 | return -ENOMEM; | ||
138 | rc = -EFAULT; | ||
139 | while (offs < count) { | ||
140 | size = min(PAGE_SIZE, count - offs); | ||
141 | if (memcpy_real(buf, src + offs, size)) | ||
142 | goto out; | ||
143 | if (copy_to_user(dest + offs, buf, size)) | ||
144 | goto out; | ||
145 | offs += size; | ||
146 | } | ||
147 | rc = 0; | ||
148 | out: | ||
149 | free_page((unsigned long) buf); | ||
150 | return rc; | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * Copy memory from user (virtual) to kernel (real) | ||
155 | */ | ||
156 | int copy_from_user_real(void *dest, void __user *src, size_t count) | ||
157 | { | ||
158 | int offs = 0, size, rc; | ||
159 | char *buf; | ||
160 | |||
161 | buf = (char *) __get_free_page(GFP_KERNEL); | ||
162 | if (!buf) | ||
163 | return -ENOMEM; | ||
164 | rc = -EFAULT; | ||
165 | while (offs < count) { | ||
166 | size = min(PAGE_SIZE, count - offs); | ||
167 | if (copy_from_user(buf, src + offs, size)) | ||
168 | goto out; | ||
169 | if (memcpy_real(dest + offs, buf, size)) | ||
170 | goto out; | ||
171 | offs += size; | ||
172 | } | ||
173 | rc = 0; | ||
174 | out: | ||
175 | free_page((unsigned long) buf); | ||
176 | return rc; | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | * Check if physical address is within prefix or zero page | ||
181 | */ | ||
182 | static int is_swapped(unsigned long addr) | ||
183 | { | ||
184 | unsigned long lc; | ||
185 | int cpu; | ||
186 | |||
187 | if (addr < sizeof(struct _lowcore)) | ||
188 | return 1; | ||
189 | for_each_online_cpu(cpu) { | ||
190 | lc = (unsigned long) lowcore_ptr[cpu]; | ||
191 | if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc) | ||
192 | continue; | ||
193 | return 1; | ||
194 | } | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * Convert a physical pointer for /dev/mem access | ||
200 | * | ||
201 | * For swapped prefix pages a new buffer is returned that contains a copy of | ||
202 | * the absolute memory. The buffer size is maximum one page large. | ||
203 | */ | ||
204 | void *xlate_dev_mem_ptr(unsigned long addr) | ||
205 | { | ||
206 | void *bounce = (void *) addr; | ||
207 | unsigned long size; | ||
208 | |||
209 | get_online_cpus(); | ||
210 | preempt_disable(); | ||
211 | if (is_swapped(addr)) { | ||
212 | size = PAGE_SIZE - (addr & ~PAGE_MASK); | ||
213 | bounce = (void *) __get_free_page(GFP_ATOMIC); | ||
214 | if (bounce) | ||
215 | memcpy_absolute(bounce, (void *) addr, size); | ||
216 | } | ||
217 | preempt_enable(); | 102 | preempt_enable(); |
218 | put_online_cpus(); | ||
219 | return bounce; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Free converted buffer for /dev/mem access (if necessary) | ||
224 | */ | ||
225 | void unxlate_dev_mem_ptr(unsigned long addr, void *buf) | ||
226 | { | ||
227 | if ((void *) addr != buf) | ||
228 | free_page((unsigned long) buf); | ||
229 | } | 103 | } |
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index c59a5efa58b..c9a9f7f1818 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c | |||
@@ -1,4 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * linux/arch/s390/mm/mmap.c | ||
3 | * | ||
2 | * flexible mmap layout support | 4 | * flexible mmap layout support |
3 | * | 5 | * |
4 | * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. | 6 | * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. |
@@ -24,11 +26,10 @@ | |||
24 | 26 | ||
25 | #include <linux/personality.h> | 27 | #include <linux/personality.h> |
26 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
27 | #include <linux/mman.h> | ||
28 | #include <linux/module.h> | 29 | #include <linux/module.h> |
29 | #include <linux/random.h> | 30 | #include <linux/random.h> |
30 | #include <linux/compat.h> | ||
31 | #include <asm/pgalloc.h> | 31 | #include <asm/pgalloc.h> |
32 | #include <asm/compat.h> | ||
32 | 33 | ||
33 | static unsigned long stack_maxrandom_size(void) | 34 | static unsigned long stack_maxrandom_size(void) |
34 | { | 35 | { |
@@ -98,20 +99,15 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
98 | mm->unmap_area = arch_unmap_area_topdown; | 99 | mm->unmap_area = arch_unmap_area_topdown; |
99 | } | 100 | } |
100 | } | 101 | } |
102 | EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); | ||
101 | 103 | ||
102 | #else | 104 | #else |
103 | 105 | ||
104 | int s390_mmap_check(unsigned long addr, unsigned long len) | 106 | int s390_mmap_check(unsigned long addr, unsigned long len) |
105 | { | 107 | { |
106 | int rc; | ||
107 | |||
108 | if (!is_compat_task() && | 108 | if (!is_compat_task() && |
109 | len >= TASK_SIZE && TASK_SIZE < (1UL << 53)) { | 109 | len >= TASK_SIZE && TASK_SIZE < (1UL << 53)) |
110 | rc = crst_table_upgrade(current->mm, 1UL << 53); | 110 | return crst_table_upgrade(current->mm, 1UL << 53); |
111 | if (rc) | ||
112 | return rc; | ||
113 | update_mm(current->mm, current); | ||
114 | } | ||
115 | return 0; | 111 | return 0; |
116 | } | 112 | } |
117 | 113 | ||
@@ -131,7 +127,6 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr, | |||
131 | rc = crst_table_upgrade(mm, 1UL << 53); | 127 | rc = crst_table_upgrade(mm, 1UL << 53); |
132 | if (rc) | 128 | if (rc) |
133 | return (unsigned long) rc; | 129 | return (unsigned long) rc; |
134 | update_mm(mm, current); | ||
135 | area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); | 130 | area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); |
136 | } | 131 | } |
137 | return area; | 132 | return area; |
@@ -154,7 +149,6 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, | |||
154 | rc = crst_table_upgrade(mm, 1UL << 53); | 149 | rc = crst_table_upgrade(mm, 1UL << 53); |
155 | if (rc) | 150 | if (rc) |
156 | return (unsigned long) rc; | 151 | return (unsigned long) rc; |
157 | update_mm(mm, current); | ||
158 | area = arch_get_unmapped_area_topdown(filp, addr, len, | 152 | area = arch_get_unmapped_area_topdown(filp, addr, len, |
159 | pgoff, flags); | 153 | pgoff, flags); |
160 | } | 154 | } |
@@ -180,5 +174,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
180 | mm->unmap_area = arch_unmap_area_topdown; | 174 | mm->unmap_area = arch_unmap_area_topdown; |
181 | } | 175 | } |
182 | } | 176 | } |
177 | EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); | ||
183 | 178 | ||
184 | #endif | 179 | #endif |
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 29ccee3651f..d013ed39743 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c | |||
@@ -2,79 +2,30 @@ | |||
2 | * Copyright IBM Corp. 2011 | 2 | * Copyright IBM Corp. 2011 |
3 | * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> | 3 | * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> |
4 | */ | 4 | */ |
5 | #include <linux/hugetlb.h> | ||
6 | #include <linux/module.h> | 5 | #include <linux/module.h> |
7 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
8 | #include <asm/cacheflush.h> | 7 | #include <linux/hugetlb.h> |
9 | #include <asm/pgtable.h> | 8 | #include <asm/pgtable.h> |
10 | #include <asm/page.h> | ||
11 | |||
12 | void storage_key_init_range(unsigned long start, unsigned long end) | ||
13 | { | ||
14 | unsigned long boundary, function, size; | ||
15 | |||
16 | while (start < end) { | ||
17 | if (MACHINE_HAS_EDAT2) { | ||
18 | /* set storage keys for a 2GB frame */ | ||
19 | function = 0x22000 | PAGE_DEFAULT_KEY; | ||
20 | size = 1UL << 31; | ||
21 | boundary = (start + size) & ~(size - 1); | ||
22 | if (boundary <= end) { | ||
23 | do { | ||
24 | start = pfmf(function, start); | ||
25 | } while (start < boundary); | ||
26 | continue; | ||
27 | } | ||
28 | } | ||
29 | if (MACHINE_HAS_EDAT1) { | ||
30 | /* set storage keys for a 1MB frame */ | ||
31 | function = 0x21000 | PAGE_DEFAULT_KEY; | ||
32 | size = 1UL << 20; | ||
33 | boundary = (start + size) & ~(size - 1); | ||
34 | if (boundary <= end) { | ||
35 | do { | ||
36 | start = pfmf(function, start); | ||
37 | } while (start < boundary); | ||
38 | continue; | ||
39 | } | ||
40 | } | ||
41 | page_set_storage_key(start, PAGE_DEFAULT_KEY, 0); | ||
42 | start += PAGE_SIZE; | ||
43 | } | ||
44 | } | ||
45 | |||
46 | static pte_t *walk_page_table(unsigned long addr) | ||
47 | { | ||
48 | pgd_t *pgdp; | ||
49 | pud_t *pudp; | ||
50 | pmd_t *pmdp; | ||
51 | pte_t *ptep; | ||
52 | |||
53 | pgdp = pgd_offset_k(addr); | ||
54 | if (pgd_none(*pgdp)) | ||
55 | return NULL; | ||
56 | pudp = pud_offset(pgdp, addr); | ||
57 | if (pud_none(*pudp) || pud_large(*pudp)) | ||
58 | return NULL; | ||
59 | pmdp = pmd_offset(pudp, addr); | ||
60 | if (pmd_none(*pmdp) || pmd_large(*pmdp)) | ||
61 | return NULL; | ||
62 | ptep = pte_offset_kernel(pmdp, addr); | ||
63 | if (pte_none(*ptep)) | ||
64 | return NULL; | ||
65 | return ptep; | ||
66 | } | ||
67 | 9 | ||
68 | static void change_page_attr(unsigned long addr, int numpages, | 10 | static void change_page_attr(unsigned long addr, int numpages, |
69 | pte_t (*set) (pte_t)) | 11 | pte_t (*set) (pte_t)) |
70 | { | 12 | { |
71 | pte_t *ptep, pte; | 13 | pte_t *ptep, pte; |
14 | pmd_t *pmdp; | ||
15 | pud_t *pudp; | ||
16 | pgd_t *pgdp; | ||
72 | int i; | 17 | int i; |
73 | 18 | ||
74 | for (i = 0; i < numpages; i++) { | 19 | for (i = 0; i < numpages; i++) { |
75 | ptep = walk_page_table(addr); | 20 | pgdp = pgd_offset(&init_mm, addr); |
76 | if (WARN_ON_ONCE(!ptep)) | 21 | pudp = pud_offset(pgdp, addr); |
77 | break; | 22 | pmdp = pmd_offset(pudp, addr); |
23 | if (pmd_huge(*pmdp)) { | ||
24 | WARN_ON_ONCE(1); | ||
25 | continue; | ||
26 | } | ||
27 | ptep = pte_offset_kernel(pmdp, addr); | ||
28 | |||
78 | pte = *ptep; | 29 | pte = *ptep; |
79 | pte = set(pte); | 30 | pte = set(pte); |
80 | __ptep_ipte(addr, ptep); | 31 | __ptep_ipte(addr, ptep); |
@@ -88,63 +39,23 @@ int set_memory_ro(unsigned long addr, int numpages) | |||
88 | change_page_attr(addr, numpages, pte_wrprotect); | 39 | change_page_attr(addr, numpages, pte_wrprotect); |
89 | return 0; | 40 | return 0; |
90 | } | 41 | } |
42 | EXPORT_SYMBOL_GPL(set_memory_ro); | ||
91 | 43 | ||
92 | int set_memory_rw(unsigned long addr, int numpages) | 44 | int set_memory_rw(unsigned long addr, int numpages) |
93 | { | 45 | { |
94 | change_page_attr(addr, numpages, pte_mkwrite); | 46 | change_page_attr(addr, numpages, pte_mkwrite); |
95 | return 0; | 47 | return 0; |
96 | } | 48 | } |
49 | EXPORT_SYMBOL_GPL(set_memory_rw); | ||
97 | 50 | ||
98 | /* not possible */ | 51 | /* not possible */ |
99 | int set_memory_nx(unsigned long addr, int numpages) | 52 | int set_memory_nx(unsigned long addr, int numpages) |
100 | { | 53 | { |
101 | return 0; | 54 | return 0; |
102 | } | 55 | } |
56 | EXPORT_SYMBOL_GPL(set_memory_nx); | ||
103 | 57 | ||
104 | int set_memory_x(unsigned long addr, int numpages) | 58 | int set_memory_x(unsigned long addr, int numpages) |
105 | { | 59 | { |
106 | return 0; | 60 | return 0; |
107 | } | 61 | } |
108 | |||
109 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
110 | void kernel_map_pages(struct page *page, int numpages, int enable) | ||
111 | { | ||
112 | unsigned long address; | ||
113 | pgd_t *pgd; | ||
114 | pud_t *pud; | ||
115 | pmd_t *pmd; | ||
116 | pte_t *pte; | ||
117 | int i; | ||
118 | |||
119 | for (i = 0; i < numpages; i++) { | ||
120 | address = page_to_phys(page + i); | ||
121 | pgd = pgd_offset_k(address); | ||
122 | pud = pud_offset(pgd, address); | ||
123 | pmd = pmd_offset(pud, address); | ||
124 | pte = pte_offset_kernel(pmd, address); | ||
125 | if (!enable) { | ||
126 | __ptep_ipte(address, pte); | ||
127 | pte_val(*pte) = _PAGE_TYPE_EMPTY; | ||
128 | continue; | ||
129 | } | ||
130 | *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW)); | ||
131 | } | ||
132 | } | ||
133 | |||
134 | #ifdef CONFIG_HIBERNATION | ||
135 | bool kernel_page_present(struct page *page) | ||
136 | { | ||
137 | unsigned long addr; | ||
138 | int cc; | ||
139 | |||
140 | addr = page_to_phys(page); | ||
141 | asm volatile( | ||
142 | " lra %1,0(%1)\n" | ||
143 | " ipm %0\n" | ||
144 | " srl %0,28" | ||
145 | : "=d" (cc), "+a" (addr) : : "cc"); | ||
146 | return cc == 0; | ||
147 | } | ||
148 | #endif /* CONFIG_HIBERNATION */ | ||
149 | |||
150 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | ||
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index ae44d2a3431..529a0883837 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright IBM Corp. 2007, 2011 | 2 | * Copyright IBM Corp. 2007,2009 |
3 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | 3 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> |
4 | */ | 4 | */ |
5 | 5 | ||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/rcupdate.h> | 18 | #include <linux/rcupdate.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | 20 | ||
21 | #include <asm/system.h> | ||
21 | #include <asm/pgtable.h> | 22 | #include <asm/pgtable.h> |
22 | #include <asm/pgalloc.h> | 23 | #include <asm/pgalloc.h> |
23 | #include <asm/tlb.h> | 24 | #include <asm/tlb.h> |
@@ -32,6 +33,17 @@ | |||
32 | #define FRAG_MASK 0x03 | 33 | #define FRAG_MASK 0x03 |
33 | #endif | 34 | #endif |
34 | 35 | ||
36 | unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; | ||
37 | EXPORT_SYMBOL(VMALLOC_START); | ||
38 | |||
39 | static int __init parse_vmalloc(char *arg) | ||
40 | { | ||
41 | if (!arg) | ||
42 | return -EINVAL; | ||
43 | VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK; | ||
44 | return 0; | ||
45 | } | ||
46 | early_param("vmalloc", parse_vmalloc); | ||
35 | 47 | ||
36 | unsigned long *crst_table_alloc(struct mm_struct *mm) | 48 | unsigned long *crst_table_alloc(struct mm_struct *mm) |
37 | { | 49 | { |
@@ -85,6 +97,7 @@ repeat: | |||
85 | crst_table_free(mm, table); | 97 | crst_table_free(mm, table); |
86 | if (mm->context.asce_limit < limit) | 98 | if (mm->context.asce_limit < limit) |
87 | goto repeat; | 99 | goto repeat; |
100 | update_mm(mm, current); | ||
88 | return 0; | 101 | return 0; |
89 | } | 102 | } |
90 | 103 | ||
@@ -92,6 +105,9 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | |||
92 | { | 105 | { |
93 | pgd_t *pgd; | 106 | pgd_t *pgd; |
94 | 107 | ||
108 | if (mm->context.asce_limit <= limit) | ||
109 | return; | ||
110 | __tlb_flush_mm(mm); | ||
95 | while (mm->context.asce_limit > limit) { | 111 | while (mm->context.asce_limit > limit) { |
96 | pgd = mm->pgd; | 112 | pgd = mm->pgd; |
97 | switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { | 113 | switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { |
@@ -114,6 +130,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | |||
114 | mm->task_size = mm->context.asce_limit; | 130 | mm->task_size = mm->context.asce_limit; |
115 | crst_table_free(mm, (unsigned long *) pgd); | 131 | crst_table_free(mm, (unsigned long *) pgd); |
116 | } | 132 | } |
133 | update_mm(mm, current); | ||
117 | } | 134 | } |
118 | #endif | 135 | #endif |
119 | 136 | ||
@@ -205,7 +222,6 @@ void gmap_free(struct gmap *gmap) | |||
205 | 222 | ||
206 | /* Free all segment & region tables. */ | 223 | /* Free all segment & region tables. */ |
207 | down_read(&gmap->mm->mmap_sem); | 224 | down_read(&gmap->mm->mmap_sem); |
208 | spin_lock(&gmap->mm->page_table_lock); | ||
209 | list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { | 225 | list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { |
210 | table = (unsigned long *) page_to_phys(page); | 226 | table = (unsigned long *) page_to_phys(page); |
211 | if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) | 227 | if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) |
@@ -214,7 +230,6 @@ void gmap_free(struct gmap *gmap) | |||
214 | gmap_unlink_segment(gmap, table); | 230 | gmap_unlink_segment(gmap, table); |
215 | __free_pages(page, ALLOC_ORDER); | 231 | __free_pages(page, ALLOC_ORDER); |
216 | } | 232 | } |
217 | spin_unlock(&gmap->mm->page_table_lock); | ||
218 | up_read(&gmap->mm->mmap_sem); | 233 | up_read(&gmap->mm->mmap_sem); |
219 | list_del(&gmap->list); | 234 | list_del(&gmap->list); |
220 | kfree(gmap); | 235 | kfree(gmap); |
@@ -241,29 +256,25 @@ void gmap_disable(struct gmap *gmap) | |||
241 | } | 256 | } |
242 | EXPORT_SYMBOL_GPL(gmap_disable); | 257 | EXPORT_SYMBOL_GPL(gmap_disable); |
243 | 258 | ||
244 | /* | ||
245 | * gmap_alloc_table is assumed to be called with mmap_sem held | ||
246 | */ | ||
247 | static int gmap_alloc_table(struct gmap *gmap, | 259 | static int gmap_alloc_table(struct gmap *gmap, |
248 | unsigned long *table, unsigned long init) | 260 | unsigned long *table, unsigned long init) |
249 | { | 261 | { |
250 | struct page *page; | 262 | struct page *page; |
251 | unsigned long *new; | 263 | unsigned long *new; |
252 | 264 | ||
253 | /* since we dont free the gmap table until gmap_free we can unlock */ | ||
254 | spin_unlock(&gmap->mm->page_table_lock); | ||
255 | page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | 265 | page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); |
256 | spin_lock(&gmap->mm->page_table_lock); | ||
257 | if (!page) | 266 | if (!page) |
258 | return -ENOMEM; | 267 | return -ENOMEM; |
259 | new = (unsigned long *) page_to_phys(page); | 268 | new = (unsigned long *) page_to_phys(page); |
260 | crst_table_init(new, init); | 269 | crst_table_init(new, init); |
270 | down_read(&gmap->mm->mmap_sem); | ||
261 | if (*table & _REGION_ENTRY_INV) { | 271 | if (*table & _REGION_ENTRY_INV) { |
262 | list_add(&page->lru, &gmap->crst_list); | 272 | list_add(&page->lru, &gmap->crst_list); |
263 | *table = (unsigned long) new | _REGION_ENTRY_LENGTH | | 273 | *table = (unsigned long) new | _REGION_ENTRY_LENGTH | |
264 | (*table & _REGION_ENTRY_TYPE_MASK); | 274 | (*table & _REGION_ENTRY_TYPE_MASK); |
265 | } else | 275 | } else |
266 | __free_pages(page, ALLOC_ORDER); | 276 | __free_pages(page, ALLOC_ORDER); |
277 | up_read(&gmap->mm->mmap_sem); | ||
267 | return 0; | 278 | return 0; |
268 | } | 279 | } |
269 | 280 | ||
@@ -288,7 +299,6 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) | |||
288 | 299 | ||
289 | flush = 0; | 300 | flush = 0; |
290 | down_read(&gmap->mm->mmap_sem); | 301 | down_read(&gmap->mm->mmap_sem); |
291 | spin_lock(&gmap->mm->page_table_lock); | ||
292 | for (off = 0; off < len; off += PMD_SIZE) { | 302 | for (off = 0; off < len; off += PMD_SIZE) { |
293 | /* Walk the guest addr space page table */ | 303 | /* Walk the guest addr space page table */ |
294 | table = gmap->table + (((to + off) >> 53) & 0x7ff); | 304 | table = gmap->table + (((to + off) >> 53) & 0x7ff); |
@@ -310,7 +320,6 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) | |||
310 | *table = _SEGMENT_ENTRY_INV; | 320 | *table = _SEGMENT_ENTRY_INV; |
311 | } | 321 | } |
312 | out: | 322 | out: |
313 | spin_unlock(&gmap->mm->page_table_lock); | ||
314 | up_read(&gmap->mm->mmap_sem); | 323 | up_read(&gmap->mm->mmap_sem); |
315 | if (flush) | 324 | if (flush) |
316 | gmap_flush_tlb(gmap); | 325 | gmap_flush_tlb(gmap); |
@@ -341,7 +350,6 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, | |||
341 | 350 | ||
342 | flush = 0; | 351 | flush = 0; |
343 | down_read(&gmap->mm->mmap_sem); | 352 | down_read(&gmap->mm->mmap_sem); |
344 | spin_lock(&gmap->mm->page_table_lock); | ||
345 | for (off = 0; off < len; off += PMD_SIZE) { | 353 | for (off = 0; off < len; off += PMD_SIZE) { |
346 | /* Walk the gmap address space page table */ | 354 | /* Walk the gmap address space page table */ |
347 | table = gmap->table + (((to + off) >> 53) & 0x7ff); | 355 | table = gmap->table + (((to + off) >> 53) & 0x7ff); |
@@ -365,24 +373,19 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, | |||
365 | flush |= gmap_unlink_segment(gmap, table); | 373 | flush |= gmap_unlink_segment(gmap, table); |
366 | *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); | 374 | *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); |
367 | } | 375 | } |
368 | spin_unlock(&gmap->mm->page_table_lock); | ||
369 | up_read(&gmap->mm->mmap_sem); | 376 | up_read(&gmap->mm->mmap_sem); |
370 | if (flush) | 377 | if (flush) |
371 | gmap_flush_tlb(gmap); | 378 | gmap_flush_tlb(gmap); |
372 | return 0; | 379 | return 0; |
373 | 380 | ||
374 | out_unmap: | 381 | out_unmap: |
375 | spin_unlock(&gmap->mm->page_table_lock); | ||
376 | up_read(&gmap->mm->mmap_sem); | 382 | up_read(&gmap->mm->mmap_sem); |
377 | gmap_unmap_segment(gmap, to, len); | 383 | gmap_unmap_segment(gmap, to, len); |
378 | return -ENOMEM; | 384 | return -ENOMEM; |
379 | } | 385 | } |
380 | EXPORT_SYMBOL_GPL(gmap_map_segment); | 386 | EXPORT_SYMBOL_GPL(gmap_map_segment); |
381 | 387 | ||
382 | /* | 388 | unsigned long gmap_fault(unsigned long address, struct gmap *gmap) |
383 | * this function is assumed to be called with mmap_sem held | ||
384 | */ | ||
385 | unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) | ||
386 | { | 389 | { |
387 | unsigned long *table, vmaddr, segment; | 390 | unsigned long *table, vmaddr, segment; |
388 | struct mm_struct *mm; | 391 | struct mm_struct *mm; |
@@ -442,75 +445,16 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) | |||
442 | page = pmd_page(*pmd); | 445 | page = pmd_page(*pmd); |
443 | mp = (struct gmap_pgtable *) page->index; | 446 | mp = (struct gmap_pgtable *) page->index; |
444 | rmap->entry = table; | 447 | rmap->entry = table; |
445 | spin_lock(&mm->page_table_lock); | ||
446 | list_add(&rmap->list, &mp->mapper); | 448 | list_add(&rmap->list, &mp->mapper); |
447 | spin_unlock(&mm->page_table_lock); | ||
448 | /* Set gmap segment table entry to page table. */ | 449 | /* Set gmap segment table entry to page table. */ |
449 | *table = pmd_val(*pmd) & PAGE_MASK; | 450 | *table = pmd_val(*pmd) & PAGE_MASK; |
450 | return vmaddr | (address & ~PMD_MASK); | 451 | return vmaddr | (address & ~PMD_MASK); |
451 | } | 452 | } |
452 | return -EFAULT; | 453 | return -EFAULT; |
453 | } | ||
454 | 454 | ||
455 | unsigned long gmap_fault(unsigned long address, struct gmap *gmap) | ||
456 | { | ||
457 | unsigned long rc; | ||
458 | |||
459 | down_read(&gmap->mm->mmap_sem); | ||
460 | rc = __gmap_fault(address, gmap); | ||
461 | up_read(&gmap->mm->mmap_sem); | ||
462 | |||
463 | return rc; | ||
464 | } | 455 | } |
465 | EXPORT_SYMBOL_GPL(gmap_fault); | 456 | EXPORT_SYMBOL_GPL(gmap_fault); |
466 | 457 | ||
467 | void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) | ||
468 | { | ||
469 | |||
470 | unsigned long *table, address, size; | ||
471 | struct vm_area_struct *vma; | ||
472 | struct gmap_pgtable *mp; | ||
473 | struct page *page; | ||
474 | |||
475 | down_read(&gmap->mm->mmap_sem); | ||
476 | address = from; | ||
477 | while (address < to) { | ||
478 | /* Walk the gmap address space page table */ | ||
479 | table = gmap->table + ((address >> 53) & 0x7ff); | ||
480 | if (unlikely(*table & _REGION_ENTRY_INV)) { | ||
481 | address = (address + PMD_SIZE) & PMD_MASK; | ||
482 | continue; | ||
483 | } | ||
484 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
485 | table = table + ((address >> 42) & 0x7ff); | ||
486 | if (unlikely(*table & _REGION_ENTRY_INV)) { | ||
487 | address = (address + PMD_SIZE) & PMD_MASK; | ||
488 | continue; | ||
489 | } | ||
490 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
491 | table = table + ((address >> 31) & 0x7ff); | ||
492 | if (unlikely(*table & _REGION_ENTRY_INV)) { | ||
493 | address = (address + PMD_SIZE) & PMD_MASK; | ||
494 | continue; | ||
495 | } | ||
496 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
497 | table = table + ((address >> 20) & 0x7ff); | ||
498 | if (unlikely(*table & _SEGMENT_ENTRY_INV)) { | ||
499 | address = (address + PMD_SIZE) & PMD_MASK; | ||
500 | continue; | ||
501 | } | ||
502 | page = pfn_to_page(*table >> PAGE_SHIFT); | ||
503 | mp = (struct gmap_pgtable *) page->index; | ||
504 | vma = find_vma(gmap->mm, mp->vmaddr); | ||
505 | size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); | ||
506 | zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), | ||
507 | size, NULL); | ||
508 | address = (address + PMD_SIZE) & PMD_MASK; | ||
509 | } | ||
510 | up_read(&gmap->mm->mmap_sem); | ||
511 | } | ||
512 | EXPORT_SYMBOL_GPL(gmap_discard); | ||
513 | |||
514 | void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) | 458 | void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) |
515 | { | 459 | { |
516 | struct gmap_rmap *rmap, *next; | 460 | struct gmap_rmap *rmap, *next; |
@@ -568,7 +512,7 @@ static inline void page_table_free_pgste(unsigned long *table) | |||
568 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 512 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
569 | mp = (struct gmap_pgtable *) page->index; | 513 | mp = (struct gmap_pgtable *) page->index; |
570 | BUG_ON(!list_empty(&mp->mapper)); | 514 | BUG_ON(!list_empty(&mp->mapper)); |
571 | pgtable_page_dtor(page); | 515 | pgtable_page_ctor(page); |
572 | atomic_set(&page->_mapcount, -1); | 516 | atomic_set(&page->_mapcount, -1); |
573 | kfree(mp); | 517 | kfree(mp); |
574 | __free_page(page); | 518 | __free_page(page); |
@@ -609,8 +553,8 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) | |||
609 | */ | 553 | */ |
610 | unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) | 554 | unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) |
611 | { | 555 | { |
612 | unsigned long *uninitialized_var(table); | 556 | struct page *page; |
613 | struct page *uninitialized_var(page); | 557 | unsigned long *table; |
614 | unsigned int mask, bit; | 558 | unsigned int mask, bit; |
615 | 559 | ||
616 | if (mm_has_pgste(mm)) | 560 | if (mm_has_pgste(mm)) |
@@ -673,6 +617,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
673 | } | 617 | } |
674 | } | 618 | } |
675 | 619 | ||
620 | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | ||
621 | |||
676 | static void __page_table_free_rcu(void *table, unsigned bit) | 622 | static void __page_table_free_rcu(void *table, unsigned bit) |
677 | { | 623 | { |
678 | struct page *page; | 624 | struct page *page; |
@@ -726,90 +672,7 @@ void __tlb_remove_table(void *_table) | |||
726 | free_pages((unsigned long) table, ALLOC_ORDER); | 672 | free_pages((unsigned long) table, ALLOC_ORDER); |
727 | } | 673 | } |
728 | 674 | ||
729 | static void tlb_remove_table_smp_sync(void *arg) | 675 | #endif |
730 | { | ||
731 | /* Simply deliver the interrupt */ | ||
732 | } | ||
733 | |||
734 | static void tlb_remove_table_one(void *table) | ||
735 | { | ||
736 | /* | ||
737 | * This isn't an RCU grace period and hence the page-tables cannot be | ||
738 | * assumed to be actually RCU-freed. | ||
739 | * | ||
740 | * It is however sufficient for software page-table walkers that rely | ||
741 | * on IRQ disabling. See the comment near struct mmu_table_batch. | ||
742 | */ | ||
743 | smp_call_function(tlb_remove_table_smp_sync, NULL, 1); | ||
744 | __tlb_remove_table(table); | ||
745 | } | ||
746 | |||
747 | static void tlb_remove_table_rcu(struct rcu_head *head) | ||
748 | { | ||
749 | struct mmu_table_batch *batch; | ||
750 | int i; | ||
751 | |||
752 | batch = container_of(head, struct mmu_table_batch, rcu); | ||
753 | |||
754 | for (i = 0; i < batch->nr; i++) | ||
755 | __tlb_remove_table(batch->tables[i]); | ||
756 | |||
757 | free_page((unsigned long)batch); | ||
758 | } | ||
759 | |||
760 | void tlb_table_flush(struct mmu_gather *tlb) | ||
761 | { | ||
762 | struct mmu_table_batch **batch = &tlb->batch; | ||
763 | |||
764 | if (*batch) { | ||
765 | __tlb_flush_mm(tlb->mm); | ||
766 | call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); | ||
767 | *batch = NULL; | ||
768 | } | ||
769 | } | ||
770 | |||
771 | void tlb_remove_table(struct mmu_gather *tlb, void *table) | ||
772 | { | ||
773 | struct mmu_table_batch **batch = &tlb->batch; | ||
774 | |||
775 | if (*batch == NULL) { | ||
776 | *batch = (struct mmu_table_batch *) | ||
777 | __get_free_page(GFP_NOWAIT | __GFP_NOWARN); | ||
778 | if (*batch == NULL) { | ||
779 | __tlb_flush_mm(tlb->mm); | ||
780 | tlb_remove_table_one(table); | ||
781 | return; | ||
782 | } | ||
783 | (*batch)->nr = 0; | ||
784 | } | ||
785 | (*batch)->tables[(*batch)->nr++] = table; | ||
786 | if ((*batch)->nr == MAX_TABLE_BATCH) | ||
787 | tlb_table_flush(tlb); | ||
788 | } | ||
789 | |||
790 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
791 | void thp_split_vma(struct vm_area_struct *vma) | ||
792 | { | ||
793 | unsigned long addr; | ||
794 | struct page *page; | ||
795 | |||
796 | for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { | ||
797 | page = follow_page(vma, addr, FOLL_SPLIT); | ||
798 | } | ||
799 | } | ||
800 | |||
801 | void thp_split_mm(struct mm_struct *mm) | ||
802 | { | ||
803 | struct vm_area_struct *vma = mm->mmap; | ||
804 | |||
805 | while (vma != NULL) { | ||
806 | thp_split_vma(vma); | ||
807 | vma->vm_flags &= ~VM_HUGEPAGE; | ||
808 | vma->vm_flags |= VM_NOHUGEPAGE; | ||
809 | vma = vma->vm_next; | ||
810 | } | ||
811 | } | ||
812 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
813 | 676 | ||
814 | /* | 677 | /* |
815 | * switch on pgstes for its userspace process (for kvm) | 678 | * switch on pgstes for its userspace process (for kvm) |
@@ -820,7 +683,7 @@ int s390_enable_sie(void) | |||
820 | struct mm_struct *mm, *old_mm; | 683 | struct mm_struct *mm, *old_mm; |
821 | 684 | ||
822 | /* Do we have switched amode? If no, we cannot do sie */ | 685 | /* Do we have switched amode? If no, we cannot do sie */ |
823 | if (s390_user_mode == HOME_SPACE_MODE) | 686 | if (user_mode == HOME_SPACE_MODE) |
824 | return -EINVAL; | 687 | return -EINVAL; |
825 | 688 | ||
826 | /* Do we have pgstes? if yes, we are done */ | 689 | /* Do we have pgstes? if yes, we are done */ |
@@ -841,19 +704,11 @@ int s390_enable_sie(void) | |||
841 | 704 | ||
842 | /* we copy the mm and let dup_mm create the page tables with_pgstes */ | 705 | /* we copy the mm and let dup_mm create the page tables with_pgstes */ |
843 | tsk->mm->context.alloc_pgste = 1; | 706 | tsk->mm->context.alloc_pgste = 1; |
844 | /* make sure that both mms have a correct rss state */ | ||
845 | sync_mm_rss(tsk->mm); | ||
846 | mm = dup_mm(tsk); | 707 | mm = dup_mm(tsk); |
847 | tsk->mm->context.alloc_pgste = 0; | 708 | tsk->mm->context.alloc_pgste = 0; |
848 | if (!mm) | 709 | if (!mm) |
849 | return -ENOMEM; | 710 | return -ENOMEM; |
850 | 711 | ||
851 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
852 | /* split thp mappings and disable thp for future mappings */ | ||
853 | thp_split_mm(mm); | ||
854 | mm->def_flags |= VM_NOHUGEPAGE; | ||
855 | #endif | ||
856 | |||
857 | /* Now lets check again if something happened */ | 712 | /* Now lets check again if something happened */ |
858 | task_lock(tsk); | 713 | task_lock(tsk); |
859 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | 714 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || |
@@ -881,80 +736,18 @@ int s390_enable_sie(void) | |||
881 | } | 736 | } |
882 | EXPORT_SYMBOL_GPL(s390_enable_sie); | 737 | EXPORT_SYMBOL_GPL(s390_enable_sie); |
883 | 738 | ||
884 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 739 | #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION) |
885 | int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, | 740 | bool kernel_page_present(struct page *page) |
886 | pmd_t *pmdp) | ||
887 | { | ||
888 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
889 | /* No need to flush TLB | ||
890 | * On s390 reference bits are in storage key and never in TLB */ | ||
891 | return pmdp_test_and_clear_young(vma, address, pmdp); | ||
892 | } | ||
893 | |||
894 | int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
895 | unsigned long address, pmd_t *pmdp, | ||
896 | pmd_t entry, int dirty) | ||
897 | { | 741 | { |
898 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | 742 | unsigned long addr; |
899 | 743 | int cc; | |
900 | if (pmd_same(*pmdp, entry)) | ||
901 | return 0; | ||
902 | pmdp_invalidate(vma, address, pmdp); | ||
903 | set_pmd_at(vma->vm_mm, address, pmdp, entry); | ||
904 | return 1; | ||
905 | } | ||
906 | |||
907 | static void pmdp_splitting_flush_sync(void *arg) | ||
908 | { | ||
909 | /* Simply deliver the interrupt */ | ||
910 | } | ||
911 | |||
912 | void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, | ||
913 | pmd_t *pmdp) | ||
914 | { | ||
915 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
916 | if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT, | ||
917 | (unsigned long *) pmdp)) { | ||
918 | /* need to serialize against gup-fast (IRQ disabled) */ | ||
919 | smp_call_function(pmdp_splitting_flush_sync, NULL, 1); | ||
920 | } | ||
921 | } | ||
922 | |||
923 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) | ||
924 | { | ||
925 | struct list_head *lh = (struct list_head *) pgtable; | ||
926 | |||
927 | assert_spin_locked(&mm->page_table_lock); | ||
928 | |||
929 | /* FIFO */ | ||
930 | if (!mm->pmd_huge_pte) | ||
931 | INIT_LIST_HEAD(lh); | ||
932 | else | ||
933 | list_add(lh, (struct list_head *) mm->pmd_huge_pte); | ||
934 | mm->pmd_huge_pte = pgtable; | ||
935 | } | ||
936 | 744 | ||
937 | pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) | 745 | addr = page_to_phys(page); |
938 | { | 746 | asm volatile( |
939 | struct list_head *lh; | 747 | " lra %1,0(%1)\n" |
940 | pgtable_t pgtable; | 748 | " ipm %0\n" |
941 | pte_t *ptep; | 749 | " srl %0,28" |
942 | 750 | : "=d" (cc), "+a" (addr) : : "cc"); | |
943 | assert_spin_locked(&mm->page_table_lock); | 751 | return cc == 0; |
944 | |||
945 | /* FIFO */ | ||
946 | pgtable = mm->pmd_huge_pte; | ||
947 | lh = (struct list_head *) pgtable; | ||
948 | if (list_empty(lh)) | ||
949 | mm->pmd_huge_pte = NULL; | ||
950 | else { | ||
951 | mm->pmd_huge_pte = (pgtable_t) lh->next; | ||
952 | list_del(lh); | ||
953 | } | ||
954 | ptep = (pte_t *) pgtable; | ||
955 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | ||
956 | ptep++; | ||
957 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | ||
958 | return pgtable; | ||
959 | } | 752 | } |
960 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 753 | #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */ |
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 6ed1426d27c..781ff516956 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c | |||
@@ -1,4 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * arch/s390/mm/vmem.c | ||
3 | * | ||
2 | * Copyright IBM Corp. 2006 | 4 | * Copyright IBM Corp. 2006 |
3 | * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> | 5 | * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> |
4 | */ | 6 | */ |
@@ -79,8 +81,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address) | |||
79 | */ | 81 | */ |
80 | static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | 82 | static int vmem_add_mem(unsigned long start, unsigned long size, int ro) |
81 | { | 83 | { |
82 | unsigned long end = start + size; | 84 | unsigned long address; |
83 | unsigned long address = start; | ||
84 | pgd_t *pg_dir; | 85 | pgd_t *pg_dir; |
85 | pud_t *pu_dir; | 86 | pud_t *pu_dir; |
86 | pmd_t *pm_dir; | 87 | pmd_t *pm_dir; |
@@ -88,8 +89,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | |||
88 | pte_t pte; | 89 | pte_t pte; |
89 | int ret = -ENOMEM; | 90 | int ret = -ENOMEM; |
90 | 91 | ||
91 | while (address < end) { | 92 | for (address = start; address < start + size; address += PAGE_SIZE) { |
92 | pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); | ||
93 | pg_dir = pgd_offset_k(address); | 93 | pg_dir = pgd_offset_k(address); |
94 | if (pgd_none(*pg_dir)) { | 94 | if (pgd_none(*pg_dir)) { |
95 | pu_dir = vmem_pud_alloc(); | 95 | pu_dir = vmem_pud_alloc(); |
@@ -97,30 +97,25 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | |||
97 | goto out; | 97 | goto out; |
98 | pgd_populate(&init_mm, pg_dir, pu_dir); | 98 | pgd_populate(&init_mm, pg_dir, pu_dir); |
99 | } | 99 | } |
100 | |||
100 | pu_dir = pud_offset(pg_dir, address); | 101 | pu_dir = pud_offset(pg_dir, address); |
101 | #if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) | ||
102 | if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && | ||
103 | !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { | ||
104 | pte_val(pte) |= _REGION3_ENTRY_LARGE; | ||
105 | pte_val(pte) |= _REGION_ENTRY_TYPE_R3; | ||
106 | pud_val(*pu_dir) = pte_val(pte); | ||
107 | address += PUD_SIZE; | ||
108 | continue; | ||
109 | } | ||
110 | #endif | ||
111 | if (pud_none(*pu_dir)) { | 102 | if (pud_none(*pu_dir)) { |
112 | pm_dir = vmem_pmd_alloc(); | 103 | pm_dir = vmem_pmd_alloc(); |
113 | if (!pm_dir) | 104 | if (!pm_dir) |
114 | goto out; | 105 | goto out; |
115 | pud_populate(&init_mm, pu_dir, pm_dir); | 106 | pud_populate(&init_mm, pu_dir, pm_dir); |
116 | } | 107 | } |
108 | |||
109 | pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); | ||
117 | pm_dir = pmd_offset(pu_dir, address); | 110 | pm_dir = pmd_offset(pu_dir, address); |
118 | #if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) | 111 | |
119 | if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && | 112 | #ifdef __s390x__ |
120 | !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { | 113 | if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) && |
114 | (address + HPAGE_SIZE <= start + size) && | ||
115 | (address >= HPAGE_SIZE)) { | ||
121 | pte_val(pte) |= _SEGMENT_ENTRY_LARGE; | 116 | pte_val(pte) |= _SEGMENT_ENTRY_LARGE; |
122 | pmd_val(*pm_dir) = pte_val(pte); | 117 | pmd_val(*pm_dir) = pte_val(pte); |
123 | address += PMD_SIZE; | 118 | address += HPAGE_SIZE - PAGE_SIZE; |
124 | continue; | 119 | continue; |
125 | } | 120 | } |
126 | #endif | 121 | #endif |
@@ -133,11 +128,10 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | |||
133 | 128 | ||
134 | pt_dir = pte_offset_kernel(pm_dir, address); | 129 | pt_dir = pte_offset_kernel(pm_dir, address); |
135 | *pt_dir = pte; | 130 | *pt_dir = pte; |
136 | address += PAGE_SIZE; | ||
137 | } | 131 | } |
138 | ret = 0; | 132 | ret = 0; |
139 | out: | 133 | out: |
140 | flush_tlb_kernel_range(start, end); | 134 | flush_tlb_kernel_range(start, start + size); |
141 | return ret; | 135 | return ret; |
142 | } | 136 | } |
143 | 137 | ||
@@ -147,8 +141,7 @@ out: | |||
147 | */ | 141 | */ |
148 | static void vmem_remove_range(unsigned long start, unsigned long size) | 142 | static void vmem_remove_range(unsigned long start, unsigned long size) |
149 | { | 143 | { |
150 | unsigned long end = start + size; | 144 | unsigned long address; |
151 | unsigned long address = start; | ||
152 | pgd_t *pg_dir; | 145 | pgd_t *pg_dir; |
153 | pud_t *pu_dir; | 146 | pud_t *pu_dir; |
154 | pmd_t *pm_dir; | 147 | pmd_t *pm_dir; |
@@ -156,37 +149,25 @@ static void vmem_remove_range(unsigned long start, unsigned long size) | |||
156 | pte_t pte; | 149 | pte_t pte; |
157 | 150 | ||
158 | pte_val(pte) = _PAGE_TYPE_EMPTY; | 151 | pte_val(pte) = _PAGE_TYPE_EMPTY; |
159 | while (address < end) { | 152 | for (address = start; address < start + size; address += PAGE_SIZE) { |
160 | pg_dir = pgd_offset_k(address); | 153 | pg_dir = pgd_offset_k(address); |
161 | if (pgd_none(*pg_dir)) { | ||
162 | address += PGDIR_SIZE; | ||
163 | continue; | ||
164 | } | ||
165 | pu_dir = pud_offset(pg_dir, address); | 154 | pu_dir = pud_offset(pg_dir, address); |
166 | if (pud_none(*pu_dir)) { | 155 | if (pud_none(*pu_dir)) |
167 | address += PUD_SIZE; | ||
168 | continue; | 156 | continue; |
169 | } | ||
170 | if (pud_large(*pu_dir)) { | ||
171 | pud_clear(pu_dir); | ||
172 | address += PUD_SIZE; | ||
173 | continue; | ||
174 | } | ||
175 | pm_dir = pmd_offset(pu_dir, address); | 157 | pm_dir = pmd_offset(pu_dir, address); |
176 | if (pmd_none(*pm_dir)) { | 158 | if (pmd_none(*pm_dir)) |
177 | address += PMD_SIZE; | ||
178 | continue; | 159 | continue; |
179 | } | 160 | |
180 | if (pmd_large(*pm_dir)) { | 161 | if (pmd_huge(*pm_dir)) { |
181 | pmd_clear(pm_dir); | 162 | pmd_clear(pm_dir); |
182 | address += PMD_SIZE; | 163 | address += HPAGE_SIZE - PAGE_SIZE; |
183 | continue; | 164 | continue; |
184 | } | 165 | } |
166 | |||
185 | pt_dir = pte_offset_kernel(pm_dir, address); | 167 | pt_dir = pte_offset_kernel(pm_dir, address); |
186 | *pt_dir = pte; | 168 | *pt_dir = pte; |
187 | address += PAGE_SIZE; | ||
188 | } | 169 | } |
189 | flush_tlb_kernel_range(start, end); | 170 | flush_tlb_kernel_range(start, start + size); |
190 | } | 171 | } |
191 | 172 | ||
192 | /* | 173 | /* |
@@ -205,7 +186,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) | |||
205 | start_addr = (unsigned long) start; | 186 | start_addr = (unsigned long) start; |
206 | end_addr = (unsigned long) (start + nr); | 187 | end_addr = (unsigned long) (start + nr); |
207 | 188 | ||
208 | for (address = start_addr; address < end_addr;) { | 189 | for (address = start_addr; address < end_addr; address += PAGE_SIZE) { |
209 | pg_dir = pgd_offset_k(address); | 190 | pg_dir = pgd_offset_k(address); |
210 | if (pgd_none(*pg_dir)) { | 191 | if (pgd_none(*pg_dir)) { |
211 | pu_dir = vmem_pud_alloc(); | 192 | pu_dir = vmem_pud_alloc(); |
@@ -224,33 +205,10 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) | |||
224 | 205 | ||
225 | pm_dir = pmd_offset(pu_dir, address); | 206 | pm_dir = pmd_offset(pu_dir, address); |
226 | if (pmd_none(*pm_dir)) { | 207 | if (pmd_none(*pm_dir)) { |
227 | #ifdef CONFIG_64BIT | ||
228 | /* Use 1MB frames for vmemmap if available. We always | ||
229 | * use large frames even if they are only partially | ||
230 | * used. | ||
231 | * Otherwise we would have also page tables since | ||
232 | * vmemmap_populate gets called for each section | ||
233 | * separately. */ | ||
234 | if (MACHINE_HAS_EDAT1) { | ||
235 | void *new_page; | ||
236 | |||
237 | new_page = vmemmap_alloc_block(PMD_SIZE, node); | ||
238 | if (!new_page) | ||
239 | goto out; | ||
240 | pte = mk_pte_phys(__pa(new_page), PAGE_RW); | ||
241 | pte_val(pte) |= _SEGMENT_ENTRY_LARGE; | ||
242 | pmd_val(*pm_dir) = pte_val(pte); | ||
243 | address = (address + PMD_SIZE) & PMD_MASK; | ||
244 | continue; | ||
245 | } | ||
246 | #endif | ||
247 | pt_dir = vmem_pte_alloc(address); | 208 | pt_dir = vmem_pte_alloc(address); |
248 | if (!pt_dir) | 209 | if (!pt_dir) |
249 | goto out; | 210 | goto out; |
250 | pmd_populate(&init_mm, pm_dir, pt_dir); | 211 | pmd_populate(&init_mm, pm_dir, pt_dir); |
251 | } else if (pmd_large(*pm_dir)) { | ||
252 | address = (address + PMD_SIZE) & PMD_MASK; | ||
253 | continue; | ||
254 | } | 212 | } |
255 | 213 | ||
256 | pt_dir = pte_offset_kernel(pm_dir, address); | 214 | pt_dir = pte_offset_kernel(pm_dir, address); |
@@ -263,7 +221,6 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) | |||
263 | pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL); | 221 | pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL); |
264 | *pt_dir = pte; | 222 | *pt_dir = pte; |
265 | } | 223 | } |
266 | address += PAGE_SIZE; | ||
267 | } | 224 | } |
268 | memset(start, 0, nr * sizeof(struct page)); | 225 | memset(start, 0, nr * sizeof(struct page)); |
269 | ret = 0; | 226 | ret = 0; |
@@ -375,12 +332,9 @@ void __init vmem_map_init(void) | |||
375 | unsigned long start, end; | 332 | unsigned long start, end; |
376 | int i; | 333 | int i; |
377 | 334 | ||
378 | ro_start = PFN_ALIGN((unsigned long)&_stext); | 335 | ro_start = ((unsigned long)&_stext) & PAGE_MASK; |
379 | ro_end = (unsigned long)&_eshared & PAGE_MASK; | 336 | ro_end = PFN_ALIGN((unsigned long)&_eshared); |
380 | for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { | 337 | for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { |
381 | if (memory_chunk[i].type == CHUNK_CRASHK || | ||
382 | memory_chunk[i].type == CHUNK_OLDMEM) | ||
383 | continue; | ||
384 | start = memory_chunk[i].addr; | 338 | start = memory_chunk[i].addr; |
385 | end = memory_chunk[i].addr + memory_chunk[i].size; | 339 | end = memory_chunk[i].addr + memory_chunk[i].size; |
386 | if (start >= ro_end || end <= ro_start) | 340 | if (start >= ro_end || end <= ro_start) |
@@ -414,9 +368,6 @@ static int __init vmem_convert_memory_chunk(void) | |||
414 | for (i = 0; i < MEMORY_CHUNKS; i++) { | 368 | for (i = 0; i < MEMORY_CHUNKS; i++) { |
415 | if (!memory_chunk[i].size) | 369 | if (!memory_chunk[i].size) |
416 | continue; | 370 | continue; |
417 | if (memory_chunk[i].type == CHUNK_CRASHK || | ||
418 | memory_chunk[i].type == CHUNK_OLDMEM) | ||
419 | continue; | ||
420 | seg = kzalloc(sizeof(*seg), GFP_KERNEL); | 371 | seg = kzalloc(sizeof(*seg), GFP_KERNEL); |
421 | if (!seg) | 372 | if (!seg) |
422 | panic("Out of memory...\n"); | 373 | panic("Out of memory...\n"); |