aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Khlebnikov <khlebnikov@openvz.org>2012-10-08 19:28:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:16 -0400
commitb3b9c2932c32e0692018ed5f12f3fd8c70eea8ce (patch)
treebca2431f0b0bd2d364f041f0344836cd39b1822c
parent5180da410db6369d1f95c9014da1c9bc33fb043e (diff)
mm, x86, pat: rework linear pfn-mmap tracking
Replace the generic vma-flag VM_PFN_AT_MMAP with x86-only VM_PAT. We can toss mapping address from remap_pfn_range() into track_pfn_vma_new(), and collect all PAT-related logic together in arch/x86/. This patch also restores orignal frustration-free is_cow_mapping() check in remap_pfn_range(), as it was before commit v2.6.28-rc8-88-g3c8bb73 ("x86: PAT: store vm_pgoff for all linear_over_vma_region mappings - v3") is_linear_pfn_mapping() checks can be removed from mm/huge_memory.c, because it already handled by VM_PFNMAP in VM_NO_THP bit-mask. [suresh.b.siddha@intel.com: Reset the VM_PAT flag as part of untrack_pfn_vma()] Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Venkatesh Pallipadi <venki@google.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Ingo Molnar <mingo@redhat.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Carsten Otte <cotte@de.ibm.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Eric Paris <eparis@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: James Morris <james.l.morris@oracle.com> Cc: Jason Baron <jbaron@redhat.com> Cc: Kentaro Takeda <takedakn@nttdata.co.jp> Cc: Matt Helsley <matthltc@us.ibm.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Robert Richter <robert.richter@amd.com> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Cc: Venkatesh Pallipadi <venki@google.com> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/x86/mm/pat.c17
-rw-r--r--include/asm-generic/pgtable.h6
-rw-r--r--include/linux/mm.h20
-rw-r--r--mm/huge_memory.c19
-rw-r--r--mm/memory.c26
5 files changed, 30 insertions, 58 deletions
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 74a702674e8..0eb572eda40 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -677,7 +677,7 @@ int track_pfn_copy(struct vm_area_struct *vma)
677 unsigned long vma_size = vma->vm_end - vma->vm_start; 677 unsigned long vma_size = vma->vm_end - vma->vm_start;
678 pgprot_t pgprot; 678 pgprot_t pgprot;
679 679
680 if (is_linear_pfn_mapping(vma)) { 680 if (vma->vm_flags & VM_PAT) {
681 /* 681 /*
682 * reserve the whole chunk covered by vma. We need the 682 * reserve the whole chunk covered by vma. We need the
683 * starting address and protection from pte. 683 * starting address and protection from pte.
@@ -699,14 +699,20 @@ int track_pfn_copy(struct vm_area_struct *vma)
699 * single reserve_pfn_range call. 699 * single reserve_pfn_range call.
700 */ 700 */
701int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 701int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
702 unsigned long pfn, unsigned long size) 702 unsigned long pfn, unsigned long addr, unsigned long size)
703{ 703{
704 resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; 704 resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
705 unsigned long flags; 705 unsigned long flags;
706 706
707 /* reserve the whole chunk starting from paddr */ 707 /* reserve the whole chunk starting from paddr */
708 if (is_linear_pfn_mapping(vma)) 708 if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) {
709 return reserve_pfn_range(paddr, size, prot, 0); 709 int ret;
710
711 ret = reserve_pfn_range(paddr, size, prot, 0);
712 if (!ret)
713 vma->vm_flags |= VM_PAT;
714 return ret;
715 }
710 716
711 if (!pat_enabled) 717 if (!pat_enabled)
712 return 0; 718 return 0;
@@ -758,7 +764,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
758 resource_size_t paddr; 764 resource_size_t paddr;
759 unsigned long prot; 765 unsigned long prot;
760 766
761 if (!is_linear_pfn_mapping(vma)) 767 if (!(vma->vm_flags & VM_PAT))
762 return; 768 return;
763 769
764 /* free the chunk starting from pfn or the whole chunk */ 770 /* free the chunk starting from pfn or the whole chunk */
@@ -772,6 +778,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
772 size = vma->vm_end - vma->vm_start; 778 size = vma->vm_end - vma->vm_start;
773 } 779 }
774 free_pfn_range(paddr, size); 780 free_pfn_range(paddr, size);
781 vma->vm_flags &= ~VM_PAT;
775} 782}
776 783
777pgprot_t pgprot_writecombine(pgprot_t prot) 784pgprot_t pgprot_writecombine(pgprot_t prot)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index d4d4592c97f..c9a612069c8 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -391,7 +391,8 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
391 * by remap_pfn_range() for physical range indicated by pfn and size. 391 * by remap_pfn_range() for physical range indicated by pfn and size.
392 */ 392 */
393static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 393static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
394 unsigned long pfn, unsigned long size) 394 unsigned long pfn, unsigned long addr,
395 unsigned long size)
395{ 396{
396 return 0; 397 return 0;
397} 398}
@@ -426,7 +427,8 @@ static inline void untrack_pfn(struct vm_area_struct *vma,
426} 427}
427#else 428#else
428extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 429extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
429 unsigned long pfn, unsigned long size); 430 unsigned long pfn, unsigned long addr,
431 unsigned long size);
430extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, 432extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
431 unsigned long pfn); 433 unsigned long pfn);
432extern int track_pfn_copy(struct vm_area_struct *vma); 434extern int track_pfn_copy(struct vm_area_struct *vma);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 311be906b57..75d1632d347 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -117,7 +117,7 @@ extern unsigned int kobjsize(const void *objp);
117#define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ 117#define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */
118#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ 118#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
119#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ 119#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */
120#define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ 120#define VM_PAT 0x40000000 /* PAT reserves whole VMA at once (x86) */
121#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ 121#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */
122 122
123/* Bits set in the VMA until the stack is in its final location */ 123/* Bits set in the VMA until the stack is in its final location */
@@ -159,24 +159,6 @@ extern pgprot_t protection_map[16];
159#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ 159#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
160 160
161/* 161/*
162 * This interface is used by x86 PAT code to identify a pfn mapping that is
163 * linear over entire vma. This is to optimize PAT code that deals with
164 * marking the physical region with a particular prot. This is not for generic
165 * mm use. Note also that this check will not work if the pfn mapping is
166 * linear for a vma starting at physical address 0. In which case PAT code
167 * falls back to slow path of reserving physical range page by page.
168 */
169static inline int is_linear_pfn_mapping(struct vm_area_struct *vma)
170{
171 return !!(vma->vm_flags & VM_PFN_AT_MMAP);
172}
173
174static inline int is_pfn_mapping(struct vm_area_struct *vma)
175{
176 return !!(vma->vm_flags & VM_PFNMAP);
177}
178
179/*
180 * vm_fault is filled by the the pagefault handler and passed to the vma's 162 * vm_fault is filled by the the pagefault handler and passed to the vma's
181 * ->fault function. The vma's ->fault is responsible for returning a bitmask 163 * ->fault function. The vma's ->fault is responsible for returning a bitmask
182 * of VM_FAULT_xxx flags that give details about how the fault was handled. 164 * of VM_FAULT_xxx flags that give details about how the fault was handled.
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 141dbb69509..73cb22ee966 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1655,11 +1655,7 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
1655 if (vma->vm_ops) 1655 if (vma->vm_ops)
1656 /* khugepaged not yet working on file or special mappings */ 1656 /* khugepaged not yet working on file or special mappings */
1657 return 0; 1657 return 0;
1658 /* 1658 VM_BUG_ON(vma->vm_flags & VM_NO_THP);
1659 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
1660 * true too, verify it here.
1661 */
1662 VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
1663 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 1659 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
1664 hend = vma->vm_end & HPAGE_PMD_MASK; 1660 hend = vma->vm_end & HPAGE_PMD_MASK;
1665 if (hstart < hend) 1661 if (hstart < hend)
@@ -1912,11 +1908,7 @@ static void collapse_huge_page(struct mm_struct *mm,
1912 goto out; 1908 goto out;
1913 if (is_vma_temporary_stack(vma)) 1909 if (is_vma_temporary_stack(vma))
1914 goto out; 1910 goto out;
1915 /* 1911 VM_BUG_ON(vma->vm_flags & VM_NO_THP);
1916 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
1917 * true too, verify it here.
1918 */
1919 VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
1920 1912
1921 pgd = pgd_offset(mm, address); 1913 pgd = pgd_offset(mm, address);
1922 if (!pgd_present(*pgd)) 1914 if (!pgd_present(*pgd))
@@ -2154,12 +2146,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
2154 goto skip; 2146 goto skip;
2155 if (is_vma_temporary_stack(vma)) 2147 if (is_vma_temporary_stack(vma))
2156 goto skip; 2148 goto skip;
2157 /* 2149 VM_BUG_ON(vma->vm_flags & VM_NO_THP);
2158 * If is_pfn_mapping() is true is_learn_pfn_mapping()
2159 * must be true too, verify it here.
2160 */
2161 VM_BUG_ON(is_linear_pfn_mapping(vma) ||
2162 vma->vm_flags & VM_NO_THP);
2163 2150
2164 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 2151 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
2165 hend = vma->vm_end & HPAGE_PMD_MASK; 2152 hend = vma->vm_end & HPAGE_PMD_MASK;
diff --git a/mm/memory.c b/mm/memory.c
index 6bef278ad30..655e1429388 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1055,7 +1055,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1055 if (is_vm_hugetlb_page(vma)) 1055 if (is_vm_hugetlb_page(vma))
1056 return copy_hugetlb_page_range(dst_mm, src_mm, vma); 1056 return copy_hugetlb_page_range(dst_mm, src_mm, vma);
1057 1057
1058 if (unlikely(is_pfn_mapping(vma))) { 1058 if (unlikely(vma->vm_flags & VM_PFNMAP)) {
1059 /* 1059 /*
1060 * We do not free on error cases below as remove_vma 1060 * We do not free on error cases below as remove_vma
1061 * gets called on error from higher level routine 1061 * gets called on error from higher level routine
@@ -1327,7 +1327,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
1327 if (vma->vm_file) 1327 if (vma->vm_file)
1328 uprobe_munmap(vma, start, end); 1328 uprobe_munmap(vma, start, end);
1329 1329
1330 if (unlikely(is_pfn_mapping(vma))) 1330 if (unlikely(vma->vm_flags & VM_PFNMAP))
1331 untrack_pfn(vma, 0, 0); 1331 untrack_pfn(vma, 0, 0);
1332 1332
1333 if (start != end) { 1333 if (start != end) {
@@ -2299,26 +2299,20 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
2299 * There's a horrible special case to handle copy-on-write 2299 * There's a horrible special case to handle copy-on-write
2300 * behaviour that some programs depend on. We mark the "original" 2300 * behaviour that some programs depend on. We mark the "original"
2301 * un-COW'ed pages by matching them up with "vma->vm_pgoff". 2301 * un-COW'ed pages by matching them up with "vma->vm_pgoff".
2302 * See vm_normal_page() for details.
2302 */ 2303 */
2303 if (addr == vma->vm_start && end == vma->vm_end) { 2304 if (is_cow_mapping(vma->vm_flags)) {
2305 if (addr != vma->vm_start || end != vma->vm_end)
2306 return -EINVAL;
2304 vma->vm_pgoff = pfn; 2307 vma->vm_pgoff = pfn;
2305 vma->vm_flags |= VM_PFN_AT_MMAP; 2308 }
2306 } else if (is_cow_mapping(vma->vm_flags)) 2309
2310 err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
2311 if (err)
2307 return -EINVAL; 2312 return -EINVAL;
2308 2313
2309 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; 2314 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
2310 2315
2311 err = track_pfn_remap(vma, &prot, pfn, PAGE_ALIGN(size));
2312 if (err) {
2313 /*
2314 * To indicate that track_pfn related cleanup is not
2315 * needed from higher level routine calling unmap_vmas
2316 */
2317 vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP);
2318 vma->vm_flags &= ~VM_PFN_AT_MMAP;
2319 return -EINVAL;
2320 }
2321
2322 BUG_ON(addr >= end); 2316 BUG_ON(addr >= end);
2323 pfn -= addr >> PAGE_SHIFT; 2317 pfn -= addr >> PAGE_SHIFT;
2324 pgd = pgd_offset(mm, addr); 2318 pgd = pgd_offset(mm, addr);