aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm
diff options
context:
space:
mode:
authorHeiko Carstens <heiko.carstens@de.ibm.com>2016-05-17 04:50:15 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2016-06-13 09:58:15 -0400
commite8a97e42dc986a081017b1e77e3a3c7f02a0a638 (patch)
treefc5e755386a6d086d4436a4a4eea7eba2cd48a7c /arch/s390/mm
parent9e20b4dac1f58921503109ea38f341ff2b0d21f5 (diff)
s390/pageattr: allow kernel page table splitting
set_memory_ro() and set_memory_rw() currently only work on 4k mappings, which is good enough for module code aka the vmalloc area. However we stumbled already twice into the need to make this also work on larger mappings: - the ro after init patch set - the crash kernel resize code Therefore this patch implements automatic kernel page table splitting if e.g. set_memory_ro() would be called on parts of a 2G mapping. This works quite the same as the x86 code, but is much simpler. In order to make this work and to be architecturally compliant we now always use the csp, cspg or crdte instructions to replace valid page table entries. This means that set_memory_ro() and set_memory_rw() will be much more expensive than before. In order to avoid huge latencies the code contains a couple of cond_resched() calls. The current code only splits page tables, but does not merge them if it would be possible. The reason for this is that currently there is no real life scenarion where this would really happen. All current use cases that I know of only change access rights once during the life time. If that should change we can still implement kernel page table merging at a later time. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/pageattr.c243
-rw-r--r--arch/s390/mm/vmem.c4
2 files changed, 214 insertions, 33 deletions
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index e67a8f712e19..91e5e29c1f5c 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -40,54 +40,235 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
40} 40}
41#endif 41#endif
42 42
43static pte_t *walk_page_table(unsigned long addr) 43static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
44 unsigned long dtt)
44{ 45{
45 pgd_t *pgdp; 46 unsigned long table, mask;
46 pud_t *pudp; 47
48 mask = 0;
49 if (MACHINE_HAS_EDAT2) {
50 switch (dtt) {
51 case CRDTE_DTT_REGION3:
52 mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
53 break;
54 case CRDTE_DTT_SEGMENT:
55 mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
56 break;
57 case CRDTE_DTT_PAGE:
58 mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
59 break;
60 }
61 table = (unsigned long)old & mask;
62 crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
63 } else if (MACHINE_HAS_IDTE) {
64 cspg(old, *old, new);
65 } else {
66 csp((unsigned int *)old + 1, *old, new);
67 }
68}
69
70struct cpa {
71 unsigned int set_ro : 1;
72 unsigned int clear_ro : 1;
73};
74
75static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
76 struct cpa cpa)
77{
78 pte_t *ptep, new;
79
80 ptep = pte_offset(pmdp, addr);
81 do {
82 if (pte_none(*ptep))
83 return -EINVAL;
84 if (cpa.set_ro)
85 new = pte_wrprotect(*ptep);
86 else if (cpa.clear_ro)
87 new = pte_mkwrite(pte_mkdirty(*ptep));
88 pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
89 ptep++;
90 addr += PAGE_SIZE;
91 cond_resched();
92 } while (addr < end);
93 return 0;
94}
95
96static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
97{
98 unsigned long pte_addr, prot;
99 pte_t *pt_dir, *ptep;
100 pmd_t new;
101 int i, ro;
102
103 pt_dir = vmem_pte_alloc();
104 if (!pt_dir)
105 return -ENOMEM;
106 pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
107 ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
108 prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
109 ptep = pt_dir;
110 for (i = 0; i < PTRS_PER_PTE; i++) {
111 pte_val(*ptep) = pte_addr | prot;
112 pte_addr += PAGE_SIZE;
113 ptep++;
114 }
115 pmd_val(new) = __pa(pt_dir) | _SEGMENT_ENTRY;
116 pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
117 return 0;
118}
119
120static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, struct cpa cpa)
121{
122 pmd_t new;
123
124 if (cpa.set_ro)
125 new = pmd_wrprotect(*pmdp);
126 else if (cpa.clear_ro)
127 new = pmd_mkwrite(pmd_mkdirty(*pmdp));
128 pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
129}
130
131static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
132 struct cpa cpa)
133{
134 unsigned long next;
47 pmd_t *pmdp; 135 pmd_t *pmdp;
48 pte_t *ptep; 136 int rc = 0;
49 137
50 pgdp = pgd_offset_k(addr);
51 if (pgd_none(*pgdp))
52 return NULL;
53 pudp = pud_offset(pgdp, addr);
54 if (pud_none(*pudp) || pud_large(*pudp))
55 return NULL;
56 pmdp = pmd_offset(pudp, addr); 138 pmdp = pmd_offset(pudp, addr);
57 if (pmd_none(*pmdp) || pmd_large(*pmdp)) 139 do {
58 return NULL; 140 if (pmd_none(*pmdp))
59 ptep = pte_offset_kernel(pmdp, addr); 141 return -EINVAL;
60 if (pte_none(*ptep)) 142 next = pmd_addr_end(addr, end);
61 return NULL; 143 if (pmd_large(*pmdp)) {
62 return ptep; 144 if (addr & ~PMD_MASK || addr + PMD_SIZE > next) {
145 rc = split_pmd_page(pmdp, addr);
146 if (rc)
147 return rc;
148 continue;
149 }
150 modify_pmd_page(pmdp, addr, cpa);
151 } else {
152 rc = walk_pte_level(pmdp, addr, next, cpa);
153 if (rc)
154 return rc;
155 }
156 pmdp++;
157 addr = next;
158 cond_resched();
159 } while (addr < end);
160 return rc;
63} 161}
64 162
65static void change_page_attr(unsigned long addr, int numpages, 163static int split_pud_page(pud_t *pudp, unsigned long addr)
66 pte_t (*set) (pte_t))
67{ 164{
68 pte_t *ptep; 165 unsigned long pmd_addr, prot;
69 int i; 166 pmd_t *pm_dir, *pmdp;
167 pud_t new;
168 int i, ro;
70 169
71 for (i = 0; i < numpages; i++) { 170 pm_dir = vmem_pmd_alloc();
72 ptep = walk_page_table(addr); 171 if (!pm_dir)
73 if (WARN_ON_ONCE(!ptep)) 172 return -ENOMEM;
74 break; 173 pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
75 *ptep = set(*ptep); 174 ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
76 addr += PAGE_SIZE; 175 prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
176 pmdp = pm_dir;
177 for (i = 0; i < PTRS_PER_PMD; i++) {
178 pmd_val(*pmdp) = pmd_addr | prot;
179 pmd_addr += PMD_SIZE;
180 pmdp++;
77 } 181 }
78 __tlb_flush_kernel(); 182 pud_val(new) = __pa(pm_dir) | _REGION3_ENTRY;
183 pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
184 return 0;
185}
186
187static void modify_pud_page(pud_t *pudp, unsigned long addr, struct cpa cpa)
188{
189 pud_t new;
190
191 if (cpa.set_ro)
192 new = pud_wrprotect(*pudp);
193 else if (cpa.clear_ro)
194 new = pud_mkwrite(pud_mkdirty(*pudp));
195 pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
196}
197
198static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
199 struct cpa cpa)
200{
201 unsigned long next;
202 pud_t *pudp;
203 int rc = 0;
204
205 pudp = pud_offset(pgd, addr);
206 do {
207 if (pud_none(*pudp))
208 return -EINVAL;
209 next = pud_addr_end(addr, end);
210 if (pud_large(*pudp)) {
211 if (addr & ~PUD_MASK || addr + PUD_SIZE > next) {
212 rc = split_pud_page(pudp, addr);
213 if (rc)
214 break;
215 continue;
216 }
217 modify_pud_page(pudp, addr, cpa);
218 } else {
219 rc = walk_pmd_level(pudp, addr, next, cpa);
220 }
221 pudp++;
222 addr = next;
223 cond_resched();
224 } while (addr < end && !rc);
225 return rc;
226}
227
228static DEFINE_MUTEX(cpa_mutex);
229
230static int change_page_attr(unsigned long addr, unsigned long end,
231 struct cpa cpa)
232{
233 unsigned long next;
234 int rc = -EINVAL;
235 pgd_t *pgdp;
236
237 if (end >= MODULES_END)
238 return -EINVAL;
239 mutex_lock(&cpa_mutex);
240 pgdp = pgd_offset_k(addr);
241 do {
242 if (pgd_none(*pgdp))
243 break;
244 next = pgd_addr_end(addr, end);
245 rc = walk_pud_level(pgdp, addr, next, cpa);
246 if (rc)
247 break;
248 cond_resched();
249 } while (pgdp++, addr = next, addr < end && !rc);
250 mutex_unlock(&cpa_mutex);
251 return rc;
79} 252}
80 253
81int set_memory_ro(unsigned long addr, int numpages) 254int set_memory_ro(unsigned long addr, int numpages)
82{ 255{
83 change_page_attr(addr, numpages, pte_wrprotect); 256 struct cpa cpa = {
84 return 0; 257 .set_ro = 1,
258 };
259
260 addr &= PAGE_MASK;
261 return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa);
85} 262}
86 263
87int set_memory_rw(unsigned long addr, int numpages) 264int set_memory_rw(unsigned long addr, int numpages)
88{ 265{
89 change_page_attr(addr, numpages, pte_mkwrite); 266 struct cpa cpa = {
90 return 0; 267 .clear_ro = 1,
268 };
269
270 addr &= PAGE_MASK;
271 return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa);
91} 272}
92 273
93/* not possible */ 274/* not possible */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 4badd8252e3c..0a7b03496f67 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -47,7 +47,7 @@ static inline pud_t *vmem_pud_alloc(void)
47 return pud; 47 return pud;
48} 48}
49 49
50static inline pmd_t *vmem_pmd_alloc(void) 50pmd_t *vmem_pmd_alloc(void)
51{ 51{
52 pmd_t *pmd = NULL; 52 pmd_t *pmd = NULL;
53 53
@@ -58,7 +58,7 @@ static inline pmd_t *vmem_pmd_alloc(void)
58 return pmd; 58 return pmd;
59} 59}
60 60
61static pte_t __ref *vmem_pte_alloc(void) 61pte_t __ref *vmem_pte_alloc(void)
62{ 62{
63 pte_t *pte; 63 pte_t *pte;
64 64