diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 99 |
1 files changed, 55 insertions, 44 deletions
diff --git a/mm/memory.c b/mm/memory.c index 0da414c383e7..c5e88bcd8ec3 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -371,33 +371,37 @@ static inline int is_cow_mapping(unsigned int flags) | |||
371 | } | 371 | } |
372 | 372 | ||
373 | /* | 373 | /* |
374 | * This function gets the "struct page" associated with a pte or returns | 374 | * vm_normal_page -- This function gets the "struct page" associated with a pte. |
375 | * NULL if no "struct page" is associated with the pte. | ||
376 | * | 375 | * |
377 | * A raw VM_PFNMAP mapping (ie. one that is not COWed) may not have any "struct | 376 | * "Special" mappings do not wish to be associated with a "struct page" (either |
378 | * page" backing, and even if they do, they are not refcounted. COWed pages of | 377 | * it doesn't exist, or it exists but they don't want to touch it). In this |
379 | * a VM_PFNMAP do always have a struct page, and they are normally refcounted | 378 | * case, NULL is returned here. "Normal" mappings do have a struct page. |
380 | * (they are _normal_ pages). | ||
381 | * | 379 | * |
382 | * So a raw PFNMAP mapping will have each page table entry just pointing | 380 | * There are 2 broad cases. Firstly, an architecture may define a pte_special() |
383 | * to a page frame number, and as far as the VM layer is concerned, those do | 381 | * pte bit, in which case this function is trivial. Secondly, an architecture |
384 | * not have pages associated with them - even if the PFN might point to memory | 382 | * may not have a spare pte bit, which requires a more complicated scheme, |
385 | * that otherwise is perfectly fine and has a "struct page". | 383 | * described below. |
384 | * | ||
385 | * A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a | ||
386 | * special mapping (even if there are underlying and valid "struct pages"). | ||
387 | * COWed pages of a VM_PFNMAP are always normal. | ||
386 | * | 388 | * |
387 | * The way we recognize COWed pages within VM_PFNMAP mappings is through the | 389 | * The way we recognize COWed pages within VM_PFNMAP mappings is through the |
388 | * rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit | 390 | * rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit |
389 | * set, and the vm_pgoff will point to the first PFN mapped: thus every | 391 | * set, and the vm_pgoff will point to the first PFN mapped: thus every special |
390 | * page that is a raw mapping will always honor the rule | 392 | * mapping will always honor the rule |
391 | * | 393 | * |
392 | * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) | 394 | * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) |
393 | * | 395 | * |
394 | * A call to vm_normal_page() will return NULL for such a page. | 396 | * And for normal mappings this is false. |
397 | * | ||
398 | * This restricts such mappings to be a linear translation from virtual address | ||
399 | * to pfn. To get around this restriction, we allow arbitrary mappings so long | ||
400 | * as the vma is not a COW mapping; in that case, we know that all ptes are | ||
401 | * special (because none can have been COWed). | ||
395 | * | 402 | * |
396 | * If the page doesn't follow the "remap_pfn_range()" rule in a VM_PFNMAP | ||
397 | * then the page has been COW'ed. A COW'ed page _does_ have a "struct page" | ||
398 | * associated with it even if it is in a VM_PFNMAP range. Calling | ||
399 | * vm_normal_page() on such a page will therefore return the "struct page". | ||
400 | * | 403 | * |
404 | * In order to support COW of arbitrary special mappings, we have VM_MIXEDMAP. | ||
401 | * | 405 | * |
402 | * VM_MIXEDMAP mappings can likewise contain memory with or without "struct | 406 | * VM_MIXEDMAP mappings can likewise contain memory with or without "struct |
403 | * page" backing, however the difference is that _all_ pages with a struct | 407 | * page" backing, however the difference is that _all_ pages with a struct |
@@ -407,16 +411,29 @@ static inline int is_cow_mapping(unsigned int flags) | |||
407 | * advantage is that we don't have to follow the strict linearity rule of | 411 | * advantage is that we don't have to follow the strict linearity rule of |
408 | * PFNMAP mappings in order to support COWable mappings. | 412 | * PFNMAP mappings in order to support COWable mappings. |
409 | * | 413 | * |
410 | * A call to vm_normal_page() with a VM_MIXEDMAP mapping will return the | ||
411 | * associated "struct page" or NULL for memory not backed by a "struct page". | ||
412 | * | ||
413 | * | ||
414 | * All other mappings should have a valid struct page, which will be | ||
415 | * returned by a call to vm_normal_page(). | ||
416 | */ | 414 | */ |
417 | struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte) | 415 | #ifdef __HAVE_ARCH_PTE_SPECIAL |
416 | # define HAVE_PTE_SPECIAL 1 | ||
417 | #else | ||
418 | # define HAVE_PTE_SPECIAL 0 | ||
419 | #endif | ||
420 | struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, | ||
421 | pte_t pte) | ||
418 | { | 422 | { |
419 | unsigned long pfn = pte_pfn(pte); | 423 | unsigned long pfn; |
424 | |||
425 | if (HAVE_PTE_SPECIAL) { | ||
426 | if (likely(!pte_special(pte))) { | ||
427 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
428 | return pte_page(pte); | ||
429 | } | ||
430 | VM_BUG_ON(!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))); | ||
431 | return NULL; | ||
432 | } | ||
433 | |||
434 | /* !HAVE_PTE_SPECIAL case follows: */ | ||
435 | |||
436 | pfn = pte_pfn(pte); | ||
420 | 437 | ||
421 | if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { | 438 | if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { |
422 | if (vma->vm_flags & VM_MIXEDMAP) { | 439 | if (vma->vm_flags & VM_MIXEDMAP) { |
@@ -424,7 +441,8 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_ | |||
424 | return NULL; | 441 | return NULL; |
425 | goto out; | 442 | goto out; |
426 | } else { | 443 | } else { |
427 | unsigned long off = (addr-vma->vm_start) >> PAGE_SHIFT; | 444 | unsigned long off; |
445 | off = (addr - vma->vm_start) >> PAGE_SHIFT; | ||
428 | if (pfn == vma->vm_pgoff + off) | 446 | if (pfn == vma->vm_pgoff + off) |
429 | return NULL; | 447 | return NULL; |
430 | if (!is_cow_mapping(vma->vm_flags)) | 448 | if (!is_cow_mapping(vma->vm_flags)) |
@@ -432,25 +450,12 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_ | |||
432 | } | 450 | } |
433 | } | 451 | } |
434 | 452 | ||
435 | #ifdef CONFIG_DEBUG_VM | 453 | VM_BUG_ON(!pfn_valid(pfn)); |
436 | /* | ||
437 | * Add some anal sanity checks for now. Eventually, | ||
438 | * we should just do "return pfn_to_page(pfn)", but | ||
439 | * in the meantime we check that we get a valid pfn, | ||
440 | * and that the resulting page looks ok. | ||
441 | */ | ||
442 | if (unlikely(!pfn_valid(pfn))) { | ||
443 | print_bad_pte(vma, pte, addr); | ||
444 | return NULL; | ||
445 | } | ||
446 | #endif | ||
447 | 454 | ||
448 | /* | 455 | /* |
449 | * NOTE! We still have PageReserved() pages in the page | 456 | * NOTE! We still have PageReserved() pages in the page tables. |
450 | * tables. | ||
451 | * | 457 | * |
452 | * The PAGE_ZERO() pages and various VDSO mappings can | 458 | * eg. VDSO mappings can cause them to exist. |
453 | * cause them to exist. | ||
454 | */ | 459 | */ |
455 | out: | 460 | out: |
456 | return pfn_to_page(pfn); | 461 | return pfn_to_page(pfn); |
@@ -1263,6 +1268,12 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, | |||
1263 | pte_t *pte, entry; | 1268 | pte_t *pte, entry; |
1264 | spinlock_t *ptl; | 1269 | spinlock_t *ptl; |
1265 | 1270 | ||
1271 | /* | ||
1272 | * Technically, architectures with pte_special can avoid all these | ||
1273 | * restrictions (same for remap_pfn_range). However we would like | ||
1274 | * consistency in testing and feature parity among all, so we should | ||
1275 | * try to keep these invariants in place for everybody. | ||
1276 | */ | ||
1266 | BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); | 1277 | BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); |
1267 | BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == | 1278 | BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == |
1268 | (VM_PFNMAP|VM_MIXEDMAP)); | 1279 | (VM_PFNMAP|VM_MIXEDMAP)); |
@@ -1278,7 +1289,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, | |||
1278 | goto out_unlock; | 1289 | goto out_unlock; |
1279 | 1290 | ||
1280 | /* Ok, finally just insert the thing.. */ | 1291 | /* Ok, finally just insert the thing.. */ |
1281 | entry = pfn_pte(pfn, vma->vm_page_prot); | 1292 | entry = pte_mkspecial(pfn_pte(pfn, vma->vm_page_prot)); |
1282 | set_pte_at(mm, addr, pte, entry); | 1293 | set_pte_at(mm, addr, pte, entry); |
1283 | update_mmu_cache(vma, addr, entry); | 1294 | update_mmu_cache(vma, addr, entry); |
1284 | 1295 | ||
@@ -1309,7 +1320,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
1309 | arch_enter_lazy_mmu_mode(); | 1320 | arch_enter_lazy_mmu_mode(); |
1310 | do { | 1321 | do { |
1311 | BUG_ON(!pte_none(*pte)); | 1322 | BUG_ON(!pte_none(*pte)); |
1312 | set_pte_at(mm, addr, pte, pfn_pte(pfn, prot)); | 1323 | set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); |
1313 | pfn++; | 1324 | pfn++; |
1314 | } while (pte++, addr += PAGE_SIZE, addr != end); | 1325 | } while (pte++, addr += PAGE_SIZE, addr != end); |
1315 | arch_leave_lazy_mmu_mode(); | 1326 | arch_leave_lazy_mmu_mode(); |