aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c99
1 files changed, 55 insertions, 44 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 0da414c383e7..c5e88bcd8ec3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -371,33 +371,37 @@ static inline int is_cow_mapping(unsigned int flags)
371} 371}
372 372
373/* 373/*
374 * This function gets the "struct page" associated with a pte or returns 374 * vm_normal_page -- This function gets the "struct page" associated with a pte.
375 * NULL if no "struct page" is associated with the pte.
376 * 375 *
377 * A raw VM_PFNMAP mapping (ie. one that is not COWed) may not have any "struct 376 * "Special" mappings do not wish to be associated with a "struct page" (either
378 * page" backing, and even if they do, they are not refcounted. COWed pages of 377 * it doesn't exist, or it exists but they don't want to touch it). In this
379 * a VM_PFNMAP do always have a struct page, and they are normally refcounted 378 * case, NULL is returned here. "Normal" mappings do have a struct page.
380 * (they are _normal_ pages).
381 * 379 *
382 * So a raw PFNMAP mapping will have each page table entry just pointing 380 * There are 2 broad cases. Firstly, an architecture may define a pte_special()
383 * to a page frame number, and as far as the VM layer is concerned, those do 381 * pte bit, in which case this function is trivial. Secondly, an architecture
384 * not have pages associated with them - even if the PFN might point to memory 382 * may not have a spare pte bit, which requires a more complicated scheme,
385 * that otherwise is perfectly fine and has a "struct page". 383 * described below.
384 *
385 * A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a
386 * special mapping (even if there are underlying and valid "struct pages").
387 * COWed pages of a VM_PFNMAP are always normal.
386 * 388 *
387 * The way we recognize COWed pages within VM_PFNMAP mappings is through the 389 * The way we recognize COWed pages within VM_PFNMAP mappings is through the
388 * rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit 390 * rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit
389 * set, and the vm_pgoff will point to the first PFN mapped: thus every 391 * set, and the vm_pgoff will point to the first PFN mapped: thus every special
390 * page that is a raw mapping will always honor the rule 392 * mapping will always honor the rule
391 * 393 *
392 * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) 394 * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
393 * 395 *
394 * A call to vm_normal_page() will return NULL for such a page. 396 * And for normal mappings this is false.
397 *
398 * This restricts such mappings to be a linear translation from virtual address
399 * to pfn. To get around this restriction, we allow arbitrary mappings so long
400 * as the vma is not a COW mapping; in that case, we know that all ptes are
401 * special (because none can have been COWed).
395 * 402 *
396 * If the page doesn't follow the "remap_pfn_range()" rule in a VM_PFNMAP
397 * then the page has been COW'ed. A COW'ed page _does_ have a "struct page"
398 * associated with it even if it is in a VM_PFNMAP range. Calling
399 * vm_normal_page() on such a page will therefore return the "struct page".
400 * 403 *
404 * In order to support COW of arbitrary special mappings, we have VM_MIXEDMAP.
401 * 405 *
402 * VM_MIXEDMAP mappings can likewise contain memory with or without "struct 406 * VM_MIXEDMAP mappings can likewise contain memory with or without "struct
403 * page" backing, however the difference is that _all_ pages with a struct 407 * page" backing, however the difference is that _all_ pages with a struct
@@ -407,16 +411,29 @@ static inline int is_cow_mapping(unsigned int flags)
407 * advantage is that we don't have to follow the strict linearity rule of 411 * advantage is that we don't have to follow the strict linearity rule of
408 * PFNMAP mappings in order to support COWable mappings. 412 * PFNMAP mappings in order to support COWable mappings.
409 * 413 *
410 * A call to vm_normal_page() with a VM_MIXEDMAP mapping will return the
411 * associated "struct page" or NULL for memory not backed by a "struct page".
412 *
413 *
414 * All other mappings should have a valid struct page, which will be
415 * returned by a call to vm_normal_page().
416 */ 414 */
417struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte) 415#ifdef __HAVE_ARCH_PTE_SPECIAL
416# define HAVE_PTE_SPECIAL 1
417#else
418# define HAVE_PTE_SPECIAL 0
419#endif
420struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
421 pte_t pte)
418{ 422{
419 unsigned long pfn = pte_pfn(pte); 423 unsigned long pfn;
424
425 if (HAVE_PTE_SPECIAL) {
426 if (likely(!pte_special(pte))) {
427 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
428 return pte_page(pte);
429 }
430 VM_BUG_ON(!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)));
431 return NULL;
432 }
433
434 /* !HAVE_PTE_SPECIAL case follows: */
435
436 pfn = pte_pfn(pte);
420 437
421 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { 438 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
422 if (vma->vm_flags & VM_MIXEDMAP) { 439 if (vma->vm_flags & VM_MIXEDMAP) {
@@ -424,7 +441,8 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
424 return NULL; 441 return NULL;
425 goto out; 442 goto out;
426 } else { 443 } else {
427 unsigned long off = (addr-vma->vm_start) >> PAGE_SHIFT; 444 unsigned long off;
445 off = (addr - vma->vm_start) >> PAGE_SHIFT;
428 if (pfn == vma->vm_pgoff + off) 446 if (pfn == vma->vm_pgoff + off)
429 return NULL; 447 return NULL;
430 if (!is_cow_mapping(vma->vm_flags)) 448 if (!is_cow_mapping(vma->vm_flags))
@@ -432,25 +450,12 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
432 } 450 }
433 } 451 }
434 452
435#ifdef CONFIG_DEBUG_VM 453 VM_BUG_ON(!pfn_valid(pfn));
436 /*
437 * Add some anal sanity checks for now. Eventually,
438 * we should just do "return pfn_to_page(pfn)", but
439 * in the meantime we check that we get a valid pfn,
440 * and that the resulting page looks ok.
441 */
442 if (unlikely(!pfn_valid(pfn))) {
443 print_bad_pte(vma, pte, addr);
444 return NULL;
445 }
446#endif
447 454
448 /* 455 /*
449 * NOTE! We still have PageReserved() pages in the page 456 * NOTE! We still have PageReserved() pages in the page tables.
450 * tables.
451 * 457 *
452 * The PAGE_ZERO() pages and various VDSO mappings can 458 * eg. VDSO mappings can cause them to exist.
453 * cause them to exist.
454 */ 459 */
455out: 460out:
456 return pfn_to_page(pfn); 461 return pfn_to_page(pfn);
@@ -1263,6 +1268,12 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1263 pte_t *pte, entry; 1268 pte_t *pte, entry;
1264 spinlock_t *ptl; 1269 spinlock_t *ptl;
1265 1270
1271 /*
1272 * Technically, architectures with pte_special can avoid all these
1273 * restrictions (same for remap_pfn_range). However we would like
1274 * consistency in testing and feature parity among all, so we should
1275 * try to keep these invariants in place for everybody.
1276 */
1266 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); 1277 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
1267 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == 1278 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
1268 (VM_PFNMAP|VM_MIXEDMAP)); 1279 (VM_PFNMAP|VM_MIXEDMAP));
@@ -1278,7 +1289,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1278 goto out_unlock; 1289 goto out_unlock;
1279 1290
1280 /* Ok, finally just insert the thing.. */ 1291 /* Ok, finally just insert the thing.. */
1281 entry = pfn_pte(pfn, vma->vm_page_prot); 1292 entry = pte_mkspecial(pfn_pte(pfn, vma->vm_page_prot));
1282 set_pte_at(mm, addr, pte, entry); 1293 set_pte_at(mm, addr, pte, entry);
1283 update_mmu_cache(vma, addr, entry); 1294 update_mmu_cache(vma, addr, entry);
1284 1295
@@ -1309,7 +1320,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
1309 arch_enter_lazy_mmu_mode(); 1320 arch_enter_lazy_mmu_mode();
1310 do { 1321 do {
1311 BUG_ON(!pte_none(*pte)); 1322 BUG_ON(!pte_none(*pte));
1312 set_pte_at(mm, addr, pte, pfn_pte(pfn, prot)); 1323 set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
1313 pfn++; 1324 pfn++;
1314 } while (pte++, addr += PAGE_SIZE, addr != end); 1325 } while (pte++, addr += PAGE_SIZE, addr != end);
1315 arch_leave_lazy_mmu_mode(); 1326 arch_leave_lazy_mmu_mode();