diff options
author | Konstantin Khlebnikov <khlebnikov@openvz.org> | 2012-10-08 19:28:34 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-09 03:22:16 -0400 |
commit | b3b9c2932c32e0692018ed5f12f3fd8c70eea8ce (patch) | |
tree | bca2431f0b0bd2d364f041f0344836cd39b1822c | |
parent | 5180da410db6369d1f95c9014da1c9bc33fb043e (diff) |
mm, x86, pat: rework linear pfn-mmap tracking
Replace the generic vma-flag VM_PFN_AT_MMAP with x86-only VM_PAT.
We can toss mapping address from remap_pfn_range() into
track_pfn_vma_new(), and collect all PAT-related logic together in
arch/x86/.
This patch also restores orignal frustration-free is_cow_mapping() check
in remap_pfn_range(), as it was before commit v2.6.28-rc8-88-g3c8bb73
("x86: PAT: store vm_pgoff for all linear_over_vma_region mappings - v3")
is_linear_pfn_mapping() checks can be removed from mm/huge_memory.c,
because it already handled by VM_PFNMAP in VM_NO_THP bit-mask.
[suresh.b.siddha@intel.com: Reset the VM_PAT flag as part of untrack_pfn_vma()]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Venkatesh Pallipadi <venki@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Carsten Otte <cotte@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Venkatesh Pallipadi <venki@google.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | arch/x86/mm/pat.c | 17 | ||||
-rw-r--r-- | include/asm-generic/pgtable.h | 6 | ||||
-rw-r--r-- | include/linux/mm.h | 20 | ||||
-rw-r--r-- | mm/huge_memory.c | 19 | ||||
-rw-r--r-- | mm/memory.c | 26 |
5 files changed, 30 insertions, 58 deletions
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 74a702674e86..0eb572eda406 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -677,7 +677,7 @@ int track_pfn_copy(struct vm_area_struct *vma) | |||
677 | unsigned long vma_size = vma->vm_end - vma->vm_start; | 677 | unsigned long vma_size = vma->vm_end - vma->vm_start; |
678 | pgprot_t pgprot; | 678 | pgprot_t pgprot; |
679 | 679 | ||
680 | if (is_linear_pfn_mapping(vma)) { | 680 | if (vma->vm_flags & VM_PAT) { |
681 | /* | 681 | /* |
682 | * reserve the whole chunk covered by vma. We need the | 682 | * reserve the whole chunk covered by vma. We need the |
683 | * starting address and protection from pte. | 683 | * starting address and protection from pte. |
@@ -699,14 +699,20 @@ int track_pfn_copy(struct vm_area_struct *vma) | |||
699 | * single reserve_pfn_range call. | 699 | * single reserve_pfn_range call. |
700 | */ | 700 | */ |
701 | int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, | 701 | int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, |
702 | unsigned long pfn, unsigned long size) | 702 | unsigned long pfn, unsigned long addr, unsigned long size) |
703 | { | 703 | { |
704 | resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; | 704 | resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; |
705 | unsigned long flags; | 705 | unsigned long flags; |
706 | 706 | ||
707 | /* reserve the whole chunk starting from paddr */ | 707 | /* reserve the whole chunk starting from paddr */ |
708 | if (is_linear_pfn_mapping(vma)) | 708 | if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) { |
709 | return reserve_pfn_range(paddr, size, prot, 0); | 709 | int ret; |
710 | |||
711 | ret = reserve_pfn_range(paddr, size, prot, 0); | ||
712 | if (!ret) | ||
713 | vma->vm_flags |= VM_PAT; | ||
714 | return ret; | ||
715 | } | ||
710 | 716 | ||
711 | if (!pat_enabled) | 717 | if (!pat_enabled) |
712 | return 0; | 718 | return 0; |
@@ -758,7 +764,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, | |||
758 | resource_size_t paddr; | 764 | resource_size_t paddr; |
759 | unsigned long prot; | 765 | unsigned long prot; |
760 | 766 | ||
761 | if (!is_linear_pfn_mapping(vma)) | 767 | if (!(vma->vm_flags & VM_PAT)) |
762 | return; | 768 | return; |
763 | 769 | ||
764 | /* free the chunk starting from pfn or the whole chunk */ | 770 | /* free the chunk starting from pfn or the whole chunk */ |
@@ -772,6 +778,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, | |||
772 | size = vma->vm_end - vma->vm_start; | 778 | size = vma->vm_end - vma->vm_start; |
773 | } | 779 | } |
774 | free_pfn_range(paddr, size); | 780 | free_pfn_range(paddr, size); |
781 | vma->vm_flags &= ~VM_PAT; | ||
775 | } | 782 | } |
776 | 783 | ||
777 | pgprot_t pgprot_writecombine(pgprot_t prot) | 784 | pgprot_t pgprot_writecombine(pgprot_t prot) |
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index d4d4592c97fc..c9a612069c8e 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -391,7 +391,8 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, | |||
391 | * by remap_pfn_range() for physical range indicated by pfn and size. | 391 | * by remap_pfn_range() for physical range indicated by pfn and size. |
392 | */ | 392 | */ |
393 | static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, | 393 | static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, |
394 | unsigned long pfn, unsigned long size) | 394 | unsigned long pfn, unsigned long addr, |
395 | unsigned long size) | ||
395 | { | 396 | { |
396 | return 0; | 397 | return 0; |
397 | } | 398 | } |
@@ -426,7 +427,8 @@ static inline void untrack_pfn(struct vm_area_struct *vma, | |||
426 | } | 427 | } |
427 | #else | 428 | #else |
428 | extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, | 429 | extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, |
429 | unsigned long pfn, unsigned long size); | 430 | unsigned long pfn, unsigned long addr, |
431 | unsigned long size); | ||
430 | extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, | 432 | extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, |
431 | unsigned long pfn); | 433 | unsigned long pfn); |
432 | extern int track_pfn_copy(struct vm_area_struct *vma); | 434 | extern int track_pfn_copy(struct vm_area_struct *vma); |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 311be906b57d..75d1632d3477 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -117,7 +117,7 @@ extern unsigned int kobjsize(const void *objp); | |||
117 | #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ | 117 | #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ |
118 | #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ | 118 | #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ |
119 | #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ | 119 | #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ |
120 | #define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ | 120 | #define VM_PAT 0x40000000 /* PAT reserves whole VMA at once (x86) */ |
121 | #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ | 121 | #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ |
122 | 122 | ||
123 | /* Bits set in the VMA until the stack is in its final location */ | 123 | /* Bits set in the VMA until the stack is in its final location */ |
@@ -159,24 +159,6 @@ extern pgprot_t protection_map[16]; | |||
159 | #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ | 159 | #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ |
160 | 160 | ||
161 | /* | 161 | /* |
162 | * This interface is used by x86 PAT code to identify a pfn mapping that is | ||
163 | * linear over entire vma. This is to optimize PAT code that deals with | ||
164 | * marking the physical region with a particular prot. This is not for generic | ||
165 | * mm use. Note also that this check will not work if the pfn mapping is | ||
166 | * linear for a vma starting at physical address 0. In which case PAT code | ||
167 | * falls back to slow path of reserving physical range page by page. | ||
168 | */ | ||
169 | static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) | ||
170 | { | ||
171 | return !!(vma->vm_flags & VM_PFN_AT_MMAP); | ||
172 | } | ||
173 | |||
174 | static inline int is_pfn_mapping(struct vm_area_struct *vma) | ||
175 | { | ||
176 | return !!(vma->vm_flags & VM_PFNMAP); | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | * vm_fault is filled by the the pagefault handler and passed to the vma's | 162 | * vm_fault is filled by the the pagefault handler and passed to the vma's |
181 | * ->fault function. The vma's ->fault is responsible for returning a bitmask | 163 | * ->fault function. The vma's ->fault is responsible for returning a bitmask |
182 | * of VM_FAULT_xxx flags that give details about how the fault was handled. | 164 | * of VM_FAULT_xxx flags that give details about how the fault was handled. |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 141dbb695097..73cb22ee9665 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1655,11 +1655,7 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma) | |||
1655 | if (vma->vm_ops) | 1655 | if (vma->vm_ops) |
1656 | /* khugepaged not yet working on file or special mappings */ | 1656 | /* khugepaged not yet working on file or special mappings */ |
1657 | return 0; | 1657 | return 0; |
1658 | /* | 1658 | VM_BUG_ON(vma->vm_flags & VM_NO_THP); |
1659 | * If is_pfn_mapping() is true is_learn_pfn_mapping() must be | ||
1660 | * true too, verify it here. | ||
1661 | */ | ||
1662 | VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP); | ||
1663 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 1659 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
1664 | hend = vma->vm_end & HPAGE_PMD_MASK; | 1660 | hend = vma->vm_end & HPAGE_PMD_MASK; |
1665 | if (hstart < hend) | 1661 | if (hstart < hend) |
@@ -1912,11 +1908,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1912 | goto out; | 1908 | goto out; |
1913 | if (is_vma_temporary_stack(vma)) | 1909 | if (is_vma_temporary_stack(vma)) |
1914 | goto out; | 1910 | goto out; |
1915 | /* | 1911 | VM_BUG_ON(vma->vm_flags & VM_NO_THP); |
1916 | * If is_pfn_mapping() is true is_learn_pfn_mapping() must be | ||
1917 | * true too, verify it here. | ||
1918 | */ | ||
1919 | VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP); | ||
1920 | 1912 | ||
1921 | pgd = pgd_offset(mm, address); | 1913 | pgd = pgd_offset(mm, address); |
1922 | if (!pgd_present(*pgd)) | 1914 | if (!pgd_present(*pgd)) |
@@ -2154,12 +2146,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, | |||
2154 | goto skip; | 2146 | goto skip; |
2155 | if (is_vma_temporary_stack(vma)) | 2147 | if (is_vma_temporary_stack(vma)) |
2156 | goto skip; | 2148 | goto skip; |
2157 | /* | 2149 | VM_BUG_ON(vma->vm_flags & VM_NO_THP); |
2158 | * If is_pfn_mapping() is true is_learn_pfn_mapping() | ||
2159 | * must be true too, verify it here. | ||
2160 | */ | ||
2161 | VM_BUG_ON(is_linear_pfn_mapping(vma) || | ||
2162 | vma->vm_flags & VM_NO_THP); | ||
2163 | 2150 | ||
2164 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 2151 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
2165 | hend = vma->vm_end & HPAGE_PMD_MASK; | 2152 | hend = vma->vm_end & HPAGE_PMD_MASK; |
diff --git a/mm/memory.c b/mm/memory.c index 6bef278ad303..655e1429388a 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1055,7 +1055,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
1055 | if (is_vm_hugetlb_page(vma)) | 1055 | if (is_vm_hugetlb_page(vma)) |
1056 | return copy_hugetlb_page_range(dst_mm, src_mm, vma); | 1056 | return copy_hugetlb_page_range(dst_mm, src_mm, vma); |
1057 | 1057 | ||
1058 | if (unlikely(is_pfn_mapping(vma))) { | 1058 | if (unlikely(vma->vm_flags & VM_PFNMAP)) { |
1059 | /* | 1059 | /* |
1060 | * We do not free on error cases below as remove_vma | 1060 | * We do not free on error cases below as remove_vma |
1061 | * gets called on error from higher level routine | 1061 | * gets called on error from higher level routine |
@@ -1327,7 +1327,7 @@ static void unmap_single_vma(struct mmu_gather *tlb, | |||
1327 | if (vma->vm_file) | 1327 | if (vma->vm_file) |
1328 | uprobe_munmap(vma, start, end); | 1328 | uprobe_munmap(vma, start, end); |
1329 | 1329 | ||
1330 | if (unlikely(is_pfn_mapping(vma))) | 1330 | if (unlikely(vma->vm_flags & VM_PFNMAP)) |
1331 | untrack_pfn(vma, 0, 0); | 1331 | untrack_pfn(vma, 0, 0); |
1332 | 1332 | ||
1333 | if (start != end) { | 1333 | if (start != end) { |
@@ -2299,26 +2299,20 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | |||
2299 | * There's a horrible special case to handle copy-on-write | 2299 | * There's a horrible special case to handle copy-on-write |
2300 | * behaviour that some programs depend on. We mark the "original" | 2300 | * behaviour that some programs depend on. We mark the "original" |
2301 | * un-COW'ed pages by matching them up with "vma->vm_pgoff". | 2301 | * un-COW'ed pages by matching them up with "vma->vm_pgoff". |
2302 | * See vm_normal_page() for details. | ||
2302 | */ | 2303 | */ |
2303 | if (addr == vma->vm_start && end == vma->vm_end) { | 2304 | if (is_cow_mapping(vma->vm_flags)) { |
2305 | if (addr != vma->vm_start || end != vma->vm_end) | ||
2306 | return -EINVAL; | ||
2304 | vma->vm_pgoff = pfn; | 2307 | vma->vm_pgoff = pfn; |
2305 | vma->vm_flags |= VM_PFN_AT_MMAP; | 2308 | } |
2306 | } else if (is_cow_mapping(vma->vm_flags)) | 2309 | |
2310 | err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size)); | ||
2311 | if (err) | ||
2307 | return -EINVAL; | 2312 | return -EINVAL; |
2308 | 2313 | ||
2309 | vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; | 2314 | vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; |
2310 | 2315 | ||
2311 | err = track_pfn_remap(vma, &prot, pfn, PAGE_ALIGN(size)); | ||
2312 | if (err) { | ||
2313 | /* | ||
2314 | * To indicate that track_pfn related cleanup is not | ||
2315 | * needed from higher level routine calling unmap_vmas | ||
2316 | */ | ||
2317 | vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP); | ||
2318 | vma->vm_flags &= ~VM_PFN_AT_MMAP; | ||
2319 | return -EINVAL; | ||
2320 | } | ||
2321 | |||
2322 | BUG_ON(addr >= end); | 2316 | BUG_ON(addr >= end); |
2323 | pfn -= addr >> PAGE_SHIFT; | 2317 | pfn -= addr >> PAGE_SHIFT; |
2324 | pgd = pgd_offset(mm, addr); | 2318 | pgd = pgd_offset(mm, addr); |