aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/pageattr.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-02-04 12:16:03 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-02-04 12:16:03 -0500
commitd2fc0bacd5c438cb459fdf531eff00ab18422a00 (patch)
treed0ea52e4d2ad2fac12e19eaf6891c6af98353cfc /arch/x86/mm/pageattr.c
parent93890b71a34f9490673a6edd56b61c2124215e46 (diff)
parent795d45b22c079946332bf3825afefe5a981a97b6 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86
* git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86: (78 commits) x86: fix RTC lockdep warning: potential hardirq recursion x86: cpa, micro-optimization x86: cpa, clean up code flow x86: cpa, eliminate CPA_ enum x86: cpa, cleanups x86: implement gbpages support in change_page_attr() x86: support gbpages in pagetable dump x86: add gbpages support to lookup_address x86: add pgtable accessor functions for gbpages x86: add PUD_PAGE_SIZE x86: add feature macros for the gbpages cpuid bit x86: switch direct mapping setup over to set_pte x86: fix page-present check in cpa_flush_range x86: remove cpa warning x86: remove now unused clear_kernel_mapping x86: switch pci-gart over to using set_memory_np() instead of clear_kernel_mapping() x86: cpa selftest, skip non present entries x86: CPA fix pagetable split x86: rename LARGE_PAGE_SIZE to PMD_PAGE_SIZE x86: cpa, fix lookup_address ...
Diffstat (limited to 'arch/x86/mm/pageattr.c')
-rw-r--r--arch/x86/mm/pageattr.c400
1 files changed, 317 insertions, 83 deletions
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e297bd65e51..bb55a78dcd6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,17 @@
16#include <asm/uaccess.h> 16#include <asm/uaccess.h>
17#include <asm/pgalloc.h> 17#include <asm/pgalloc.h>
18 18
19/*
20 * The current flushing context - we pass it instead of 5 arguments:
21 */
22struct cpa_data {
23 unsigned long vaddr;
24 pgprot_t mask_set;
25 pgprot_t mask_clr;
26 int numpages;
27 int flushtlb;
28};
29
19static inline int 30static inline int
20within(unsigned long addr, unsigned long start, unsigned long end) 31within(unsigned long addr, unsigned long start, unsigned long end)
21{ 32{
@@ -52,21 +63,23 @@ void clflush_cache_range(void *vaddr, unsigned int size)
52 63
53static void __cpa_flush_all(void *arg) 64static void __cpa_flush_all(void *arg)
54{ 65{
66 unsigned long cache = (unsigned long)arg;
67
55 /* 68 /*
56 * Flush all to work around Errata in early athlons regarding 69 * Flush all to work around Errata in early athlons regarding
57 * large page flushing. 70 * large page flushing.
58 */ 71 */
59 __flush_tlb_all(); 72 __flush_tlb_all();
60 73
61 if (boot_cpu_data.x86_model >= 4) 74 if (cache && boot_cpu_data.x86_model >= 4)
62 wbinvd(); 75 wbinvd();
63} 76}
64 77
65static void cpa_flush_all(void) 78static void cpa_flush_all(unsigned long cache)
66{ 79{
67 BUG_ON(irqs_disabled()); 80 BUG_ON(irqs_disabled());
68 81
69 on_each_cpu(__cpa_flush_all, NULL, 1, 1); 82 on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
70} 83}
71 84
72static void __cpa_flush_range(void *arg) 85static void __cpa_flush_range(void *arg)
@@ -79,7 +92,7 @@ static void __cpa_flush_range(void *arg)
79 __flush_tlb_all(); 92 __flush_tlb_all();
80} 93}
81 94
82static void cpa_flush_range(unsigned long start, int numpages) 95static void cpa_flush_range(unsigned long start, int numpages, int cache)
83{ 96{
84 unsigned int i, level; 97 unsigned int i, level;
85 unsigned long addr; 98 unsigned long addr;
@@ -89,6 +102,9 @@ static void cpa_flush_range(unsigned long start, int numpages)
89 102
90 on_each_cpu(__cpa_flush_range, NULL, 1, 1); 103 on_each_cpu(__cpa_flush_range, NULL, 1, 1);
91 104
105 if (!cache)
106 return;
107
92 /* 108 /*
93 * We only need to flush on one CPU, 109 * We only need to flush on one CPU,
94 * clflush is a MESI-coherent instruction that 110 * clflush is a MESI-coherent instruction that
@@ -101,11 +117,27 @@ static void cpa_flush_range(unsigned long start, int numpages)
101 /* 117 /*
102 * Only flush present addresses: 118 * Only flush present addresses:
103 */ 119 */
104 if (pte && pte_present(*pte)) 120 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
105 clflush_cache_range((void *) addr, PAGE_SIZE); 121 clflush_cache_range((void *) addr, PAGE_SIZE);
106 } 122 }
107} 123}
108 124
125#define HIGH_MAP_START __START_KERNEL_map
126#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE)
127
128
129/*
130 * Converts a virtual address to a X86-64 highmap address
131 */
132static unsigned long virt_to_highmap(void *address)
133{
134#ifdef CONFIG_X86_64
135 return __pa((unsigned long)address) + HIGH_MAP_START - phys_base;
136#else
137 return (unsigned long)address;
138#endif
139}
140
109/* 141/*
110 * Certain areas of memory on x86 require very specific protection flags, 142 * Certain areas of memory on x86 require very specific protection flags,
111 * for example the BIOS area or kernel text. Callers don't always get this 143 * for example the BIOS area or kernel text. Callers don't always get this
@@ -129,12 +161,24 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
129 */ 161 */
130 if (within(address, (unsigned long)_text, (unsigned long)_etext)) 162 if (within(address, (unsigned long)_text, (unsigned long)_etext))
131 pgprot_val(forbidden) |= _PAGE_NX; 163 pgprot_val(forbidden) |= _PAGE_NX;
164 /*
165 * Do the same for the x86-64 high kernel mapping
166 */
167 if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
168 pgprot_val(forbidden) |= _PAGE_NX;
169
132 170
133#ifdef CONFIG_DEBUG_RODATA 171#ifdef CONFIG_DEBUG_RODATA
134 /* The .rodata section needs to be read-only */ 172 /* The .rodata section needs to be read-only */
135 if (within(address, (unsigned long)__start_rodata, 173 if (within(address, (unsigned long)__start_rodata,
136 (unsigned long)__end_rodata)) 174 (unsigned long)__end_rodata))
137 pgprot_val(forbidden) |= _PAGE_RW; 175 pgprot_val(forbidden) |= _PAGE_RW;
176 /*
177 * Do the same for the x86-64 high kernel mapping
178 */
179 if (within(address, virt_to_highmap(__start_rodata),
180 virt_to_highmap(__end_rodata)))
181 pgprot_val(forbidden) |= _PAGE_RW;
138#endif 182#endif
139 183
140 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); 184 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
@@ -142,6 +186,14 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
142 return prot; 186 return prot;
143} 187}
144 188
189/*
190 * Lookup the page table entry for a virtual address. Return a pointer
191 * to the entry and the level of the mapping.
192 *
193 * Note: We return pud and pmd either when the entry is marked large
194 * or when the present bit is not set. Otherwise we would return a
195 * pointer to a nonexisting mapping.
196 */
145pte_t *lookup_address(unsigned long address, int *level) 197pte_t *lookup_address(unsigned long address, int *level)
146{ 198{
147 pgd_t *pgd = pgd_offset_k(address); 199 pgd_t *pgd = pgd_offset_k(address);
@@ -152,21 +204,31 @@ pte_t *lookup_address(unsigned long address, int *level)
152 204
153 if (pgd_none(*pgd)) 205 if (pgd_none(*pgd))
154 return NULL; 206 return NULL;
207
155 pud = pud_offset(pgd, address); 208 pud = pud_offset(pgd, address);
156 if (pud_none(*pud)) 209 if (pud_none(*pud))
157 return NULL; 210 return NULL;
211
212 *level = PG_LEVEL_1G;
213 if (pud_large(*pud) || !pud_present(*pud))
214 return (pte_t *)pud;
215
158 pmd = pmd_offset(pud, address); 216 pmd = pmd_offset(pud, address);
159 if (pmd_none(*pmd)) 217 if (pmd_none(*pmd))
160 return NULL; 218 return NULL;
161 219
162 *level = PG_LEVEL_2M; 220 *level = PG_LEVEL_2M;
163 if (pmd_large(*pmd)) 221 if (pmd_large(*pmd) || !pmd_present(*pmd))
164 return (pte_t *)pmd; 222 return (pte_t *)pmd;
165 223
166 *level = PG_LEVEL_4K; 224 *level = PG_LEVEL_4K;
225
167 return pte_offset_kernel(pmd, address); 226 return pte_offset_kernel(pmd, address);
168} 227}
169 228
229/*
230 * Set the new pmd in all the pgds we know about:
231 */
170static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 232static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
171{ 233{
172 /* change init_mm */ 234 /* change init_mm */
@@ -175,6 +237,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
175 if (!SHARED_KERNEL_PMD) { 237 if (!SHARED_KERNEL_PMD) {
176 struct page *page; 238 struct page *page;
177 239
240 address = __pa(address);
178 list_for_each_entry(page, &pgd_list, lru) { 241 list_for_each_entry(page, &pgd_list, lru) {
179 pgd_t *pgd; 242 pgd_t *pgd;
180 pud_t *pud; 243 pud_t *pud;
@@ -189,18 +252,114 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
189#endif 252#endif
190} 253}
191 254
255static int
256try_preserve_large_page(pte_t *kpte, unsigned long address,
257 struct cpa_data *cpa)
258{
259 unsigned long nextpage_addr, numpages, pmask, psize, flags;
260 pte_t new_pte, old_pte, *tmp;
261 pgprot_t old_prot, new_prot;
262 int level, do_split = 1;
263
264 /*
265 * An Athlon 64 X2 showed hard hangs if we tried to preserve
266 * largepages and changed the PSE entry from RW to RO.
267 *
268 * As AMD CPUs have a long series of erratas in this area,
269 * (and none of the known ones seem to explain this hang),
270 * disable this code until the hang can be debugged:
271 */
272 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
273 return 1;
274
275 spin_lock_irqsave(&pgd_lock, flags);
276 /*
277 * Check for races, another CPU might have split this page
278 * up already:
279 */
280 tmp = lookup_address(address, &level);
281 if (tmp != kpte)
282 goto out_unlock;
283
284 switch (level) {
285 case PG_LEVEL_2M:
286 psize = PMD_PAGE_SIZE;
287 pmask = PMD_PAGE_MASK;
288 break;
289#ifdef CONFIG_X86_64
290 case PG_LEVEL_1G:
291 psize = PMD_PAGE_SIZE;
292 pmask = PMD_PAGE_MASK;
293 break;
294#endif
295 default:
296 do_split = -EINVAL;
297 goto out_unlock;
298 }
299
300 /*
301 * Calculate the number of pages, which fit into this large
302 * page starting at address:
303 */
304 nextpage_addr = (address + psize) & pmask;
305 numpages = (nextpage_addr - address) >> PAGE_SHIFT;
306 if (numpages < cpa->numpages)
307 cpa->numpages = numpages;
308
309 /*
310 * We are safe now. Check whether the new pgprot is the same:
311 */
312 old_pte = *kpte;
313 old_prot = new_prot = pte_pgprot(old_pte);
314
315 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
316 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
317 new_prot = static_protections(new_prot, address);
318
319 /*
320 * If there are no changes, return. maxpages has been updated
321 * above:
322 */
323 if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
324 do_split = 0;
325 goto out_unlock;
326 }
327
328 /*
329 * We need to change the attributes. Check, whether we can
330 * change the large page in one go. We request a split, when
331 * the address is not aligned and the number of pages is
332 * smaller than the number of pages in the large page. Note
333 * that we limited the number of possible pages already to
334 * the number of pages in the large page.
335 */
336 if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
337 /*
338 * The address is aligned and the number of pages
339 * covers the full page.
340 */
341 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
342 __set_pmd_pte(kpte, address, new_pte);
343 cpa->flushtlb = 1;
344 do_split = 0;
345 }
346
347out_unlock:
348 spin_unlock_irqrestore(&pgd_lock, flags);
349
350 return do_split;
351}
352
192static int split_large_page(pte_t *kpte, unsigned long address) 353static int split_large_page(pte_t *kpte, unsigned long address)
193{ 354{
194 pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte)); 355 unsigned long flags, pfn, pfninc = 1;
195 gfp_t gfp_flags = GFP_KERNEL; 356 gfp_t gfp_flags = GFP_KERNEL;
196 unsigned long flags; 357 unsigned int i, level;
197 unsigned long addr;
198 pte_t *pbase, *tmp; 358 pte_t *pbase, *tmp;
359 pgprot_t ref_prot;
199 struct page *base; 360 struct page *base;
200 unsigned int i, level;
201 361
202#ifdef CONFIG_DEBUG_PAGEALLOC 362#ifdef CONFIG_DEBUG_PAGEALLOC
203 gfp_flags = __GFP_HIGH | __GFP_NOFAIL | __GFP_NOWARN;
204 gfp_flags = GFP_ATOMIC | __GFP_NOWARN; 363 gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
205#endif 364#endif
206 base = alloc_pages(gfp_flags, 0); 365 base = alloc_pages(gfp_flags, 0);
@@ -213,30 +372,41 @@ static int split_large_page(pte_t *kpte, unsigned long address)
213 * up for us already: 372 * up for us already:
214 */ 373 */
215 tmp = lookup_address(address, &level); 374 tmp = lookup_address(address, &level);
216 if (tmp != kpte) { 375 if (tmp != kpte)
217 WARN_ON_ONCE(1);
218 goto out_unlock; 376 goto out_unlock;
219 }
220 377
221 address = __pa(address);
222 addr = address & LARGE_PAGE_MASK;
223 pbase = (pte_t *)page_address(base); 378 pbase = (pte_t *)page_address(base);
224#ifdef CONFIG_X86_32 379#ifdef CONFIG_X86_32
225 paravirt_alloc_pt(&init_mm, page_to_pfn(base)); 380 paravirt_alloc_pt(&init_mm, page_to_pfn(base));
226#endif 381#endif
382 ref_prot = pte_pgprot(pte_clrhuge(*kpte));
383
384#ifdef CONFIG_X86_64
385 if (level == PG_LEVEL_1G) {
386 pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
387 pgprot_val(ref_prot) |= _PAGE_PSE;
388 }
389#endif
227 390
228 pgprot_val(ref_prot) &= ~_PAGE_NX; 391 /*
229 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) 392 * Get the target pfn from the original entry:
230 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot)); 393 */
394 pfn = pte_pfn(*kpte);
395 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
396 set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
231 397
232 /* 398 /*
233 * Install the new, split up pagetable. Important detail here: 399 * Install the new, split up pagetable. Important details here:
234 * 400 *
235 * On Intel the NX bit of all levels must be cleared to make a 401 * On Intel the NX bit of all levels must be cleared to make a
236 * page executable. See section 4.13.2 of Intel 64 and IA-32 402 * page executable. See section 4.13.2 of Intel 64 and IA-32
237 * Architectures Software Developer's Manual). 403 * Architectures Software Developer's Manual).
404 *
405 * Mark the entry present. The current mapping might be
406 * set to not present, which we preserved above.
238 */ 407 */
239 ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); 408 ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
409 pgprot_val(ref_prot) |= _PAGE_PRESENT;
240 __set_pmd_pte(kpte, address, mk_pte(base, ref_prot)); 410 __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
241 base = NULL; 411 base = NULL;
242 412
@@ -249,18 +419,12 @@ out_unlock:
249 return 0; 419 return 0;
250} 420}
251 421
252static int 422static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
253__change_page_attr(unsigned long address, unsigned long pfn,
254 pgprot_t mask_set, pgprot_t mask_clr)
255{ 423{
424 int level, do_split, err;
256 struct page *kpte_page; 425 struct page *kpte_page;
257 int level, err = 0;
258 pte_t *kpte; 426 pte_t *kpte;
259 427
260#ifdef CONFIG_X86_32
261 BUG_ON(pfn > max_low_pfn);
262#endif
263
264repeat: 428repeat:
265 kpte = lookup_address(address, &level); 429 kpte = lookup_address(address, &level);
266 if (!kpte) 430 if (!kpte)
@@ -271,23 +435,62 @@ repeat:
271 BUG_ON(PageCompound(kpte_page)); 435 BUG_ON(PageCompound(kpte_page));
272 436
273 if (level == PG_LEVEL_4K) { 437 if (level == PG_LEVEL_4K) {
274 pgprot_t new_prot = pte_pgprot(*kpte);
275 pte_t new_pte, old_pte = *kpte; 438 pte_t new_pte, old_pte = *kpte;
439 pgprot_t new_prot = pte_pgprot(old_pte);
440
441 if(!pte_val(old_pte)) {
442 printk(KERN_WARNING "CPA: called for zero pte. "
443 "vaddr = %lx cpa->vaddr = %lx\n", address,
444 cpa->vaddr);
445 WARN_ON(1);
446 return -EINVAL;
447 }
276 448
277 pgprot_val(new_prot) &= ~pgprot_val(mask_clr); 449 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
278 pgprot_val(new_prot) |= pgprot_val(mask_set); 450 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
279 451
280 new_prot = static_protections(new_prot, address); 452 new_prot = static_protections(new_prot, address);
281 453
282 new_pte = pfn_pte(pfn, canon_pgprot(new_prot)); 454 /*
283 BUG_ON(pte_pfn(new_pte) != pte_pfn(old_pte)); 455 * We need to keep the pfn from the existing PTE,
456 * after all we're only going to change it's attributes
457 * not the memory it points to
458 */
459 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
460
461 /*
462 * Do we really change anything ?
463 */
464 if (pte_val(old_pte) != pte_val(new_pte)) {
465 set_pte_atomic(kpte, new_pte);
466 cpa->flushtlb = 1;
467 }
468 cpa->numpages = 1;
469 return 0;
470 }
471
472 /*
473 * Check, whether we can keep the large page intact
474 * and just change the pte:
475 */
476 do_split = try_preserve_large_page(kpte, address, cpa);
477 /*
478 * When the range fits into the existing large page,
479 * return. cp->numpages and cpa->tlbflush have been updated in
480 * try_large_page:
481 */
482 if (do_split <= 0)
483 return do_split;
284 484
285 set_pte_atomic(kpte, new_pte); 485 /*
286 } else { 486 * We have to split the large page:
287 err = split_large_page(kpte, address); 487 */
288 if (!err) 488 err = split_large_page(kpte, address);
289 goto repeat; 489 if (!err) {
490 cpa->flushtlb = 1;
491 goto repeat;
290 } 492 }
493
291 return err; 494 return err;
292} 495}
293 496
@@ -304,19 +507,14 @@ repeat:
304 * 507 *
305 * Modules and drivers should use the set_memory_* APIs instead. 508 * Modules and drivers should use the set_memory_* APIs instead.
306 */ 509 */
307 510static int change_page_attr_addr(struct cpa_data *cpa)
308#define HIGH_MAP_START __START_KERNEL_map
309#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE)
310
311static int
312change_page_attr_addr(unsigned long address, pgprot_t mask_set,
313 pgprot_t mask_clr)
314{ 511{
315 unsigned long phys_addr = __pa(address);
316 unsigned long pfn = phys_addr >> PAGE_SHIFT;
317 int err; 512 int err;
513 unsigned long address = cpa->vaddr;
318 514
319#ifdef CONFIG_X86_64 515#ifdef CONFIG_X86_64
516 unsigned long phys_addr = __pa(address);
517
320 /* 518 /*
321 * If we are inside the high mapped kernel range, then we 519 * If we are inside the high mapped kernel range, then we
322 * fixup the low mapping first. __va() returns the virtual 520 * fixup the low mapping first. __va() returns the virtual
@@ -326,7 +524,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
326 address = (unsigned long) __va(phys_addr); 524 address = (unsigned long) __va(phys_addr);
327#endif 525#endif
328 526
329 err = __change_page_attr(address, pfn, mask_set, mask_clr); 527 err = __change_page_attr(address, cpa);
330 if (err) 528 if (err)
331 return err; 529 return err;
332 530
@@ -339,42 +537,89 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
339 /* 537 /*
340 * Calc the high mapping address. See __phys_addr() 538 * Calc the high mapping address. See __phys_addr()
341 * for the non obvious details. 539 * for the non obvious details.
540 *
541 * Note that NX and other required permissions are
542 * checked in static_protections().
342 */ 543 */
343 address = phys_addr + HIGH_MAP_START - phys_base; 544 address = phys_addr + HIGH_MAP_START - phys_base;
344 /* Make sure the kernel mappings stay executable */
345 pgprot_val(mask_clr) |= _PAGE_NX;
346 545
347 /* 546 /*
348 * Our high aliases are imprecise, because we check 547 * Our high aliases are imprecise, because we check
349 * everything between 0 and KERNEL_TEXT_SIZE, so do 548 * everything between 0 and KERNEL_TEXT_SIZE, so do
350 * not propagate lookup failures back to users: 549 * not propagate lookup failures back to users:
351 */ 550 */
352 __change_page_attr(address, pfn, mask_set, mask_clr); 551 __change_page_attr(address, cpa);
353 } 552 }
354#endif 553#endif
355 return err; 554 return err;
356} 555}
357 556
358static int __change_page_attr_set_clr(unsigned long addr, int numpages, 557static int __change_page_attr_set_clr(struct cpa_data *cpa)
359 pgprot_t mask_set, pgprot_t mask_clr)
360{ 558{
361 unsigned int i; 559 int ret, numpages = cpa->numpages;
362 int ret;
363 560
364 for (i = 0; i < numpages ; i++, addr += PAGE_SIZE) { 561 while (numpages) {
365 ret = change_page_attr_addr(addr, mask_set, mask_clr); 562 /*
563 * Store the remaining nr of pages for the large page
564 * preservation check.
565 */
566 cpa->numpages = numpages;
567 ret = change_page_attr_addr(cpa);
366 if (ret) 568 if (ret)
367 return ret; 569 return ret;
368 }
369 570
571 /*
572 * Adjust the number of pages with the result of the
573 * CPA operation. Either a large page has been
574 * preserved or a single page update happened.
575 */
576 BUG_ON(cpa->numpages > numpages);
577 numpages -= cpa->numpages;
578 cpa->vaddr += cpa->numpages * PAGE_SIZE;
579 }
370 return 0; 580 return 0;
371} 581}
372 582
583static inline int cache_attr(pgprot_t attr)
584{
585 return pgprot_val(attr) &
586 (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
587}
588
373static int change_page_attr_set_clr(unsigned long addr, int numpages, 589static int change_page_attr_set_clr(unsigned long addr, int numpages,
374 pgprot_t mask_set, pgprot_t mask_clr) 590 pgprot_t mask_set, pgprot_t mask_clr)
375{ 591{
376 int ret = __change_page_attr_set_clr(addr, numpages, mask_set, 592 struct cpa_data cpa;
377 mask_clr); 593 int ret, cache;
594
595 /*
596 * Check, if we are requested to change a not supported
597 * feature:
598 */
599 mask_set = canon_pgprot(mask_set);
600 mask_clr = canon_pgprot(mask_clr);
601 if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
602 return 0;
603
604 cpa.vaddr = addr;
605 cpa.numpages = numpages;
606 cpa.mask_set = mask_set;
607 cpa.mask_clr = mask_clr;
608 cpa.flushtlb = 0;
609
610 ret = __change_page_attr_set_clr(&cpa);
611
612 /*
613 * Check whether we really changed something:
614 */
615 if (!cpa.flushtlb)
616 return ret;
617
618 /*
619 * No need to flush, when we did not set any of the caching
620 * attributes:
621 */
622 cache = cache_attr(mask_set);
378 623
379 /* 624 /*
380 * On success we use clflush, when the CPU supports it to 625 * On success we use clflush, when the CPU supports it to
@@ -383,9 +628,9 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
383 * wbindv): 628 * wbindv):
384 */ 629 */
385 if (!ret && cpu_has_clflush) 630 if (!ret && cpu_has_clflush)
386 cpa_flush_range(addr, numpages); 631 cpa_flush_range(addr, numpages, cache);
387 else 632 else
388 cpa_flush_all(); 633 cpa_flush_all(cache);
389 634
390 return ret; 635 return ret;
391} 636}
@@ -489,37 +734,26 @@ int set_pages_rw(struct page *page, int numpages)
489 return set_memory_rw(addr, numpages); 734 return set_memory_rw(addr, numpages);
490} 735}
491 736
492
493#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_CPA_DEBUG)
494static inline int __change_page_attr_set(unsigned long addr, int numpages,
495 pgprot_t mask)
496{
497 return __change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
498}
499
500static inline int __change_page_attr_clear(unsigned long addr, int numpages,
501 pgprot_t mask)
502{
503 return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
504}
505#endif
506
507#ifdef CONFIG_DEBUG_PAGEALLOC 737#ifdef CONFIG_DEBUG_PAGEALLOC
508 738
509static int __set_pages_p(struct page *page, int numpages) 739static int __set_pages_p(struct page *page, int numpages)
510{ 740{
511 unsigned long addr = (unsigned long)page_address(page); 741 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
742 .numpages = numpages,
743 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
744 .mask_clr = __pgprot(0)};
512 745
513 return __change_page_attr_set(addr, numpages, 746 return __change_page_attr_set_clr(&cpa);
514 __pgprot(_PAGE_PRESENT | _PAGE_RW));
515} 747}
516 748
517static int __set_pages_np(struct page *page, int numpages) 749static int __set_pages_np(struct page *page, int numpages)
518{ 750{
519 unsigned long addr = (unsigned long)page_address(page); 751 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
752 .numpages = numpages,
753 .mask_set = __pgprot(0),
754 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
520 755
521 return __change_page_attr_clear(addr, numpages, 756 return __change_page_attr_set_clr(&cpa);
522 __pgprot(_PAGE_PRESENT));
523} 757}
524 758
525void kernel_map_pages(struct page *page, int numpages, int enable) 759void kernel_map_pages(struct page *page, int numpages, int enable)