diff options
-rw-r--r-- | arch/i386/mm/hugetlbpage.c | 112 | ||||
-rw-r--r-- | arch/ia64/mm/hugetlbpage.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 5 | ||||
-rw-r--r-- | arch/sh/mm/hugetlbpage.c | 5 | ||||
-rw-r--r-- | arch/sh64/mm/hugetlbpage.c | 5 | ||||
-rw-r--r-- | arch/sparc64/mm/hugetlbpage.c | 5 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 1 | ||||
-rw-r--r-- | mm/hugetlb.c | 7 |
8 files changed, 144 insertions, 1 deletions
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c index 1719a8141f81..34728e4afe48 100644 --- a/arch/i386/mm/hugetlbpage.c +++ b/arch/i386/mm/hugetlbpage.c | |||
@@ -17,6 +17,113 @@ | |||
17 | #include <asm/tlb.h> | 17 | #include <asm/tlb.h> |
18 | #include <asm/tlbflush.h> | 18 | #include <asm/tlbflush.h> |
19 | 19 | ||
20 | static unsigned long page_table_shareable(struct vm_area_struct *svma, | ||
21 | struct vm_area_struct *vma, | ||
22 | unsigned long addr, pgoff_t idx) | ||
23 | { | ||
24 | unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + | ||
25 | svma->vm_start; | ||
26 | unsigned long sbase = saddr & PUD_MASK; | ||
27 | unsigned long s_end = sbase + PUD_SIZE; | ||
28 | |||
29 | /* | ||
30 | * match the virtual addresses, permission and the alignment of the | ||
31 | * page table page. | ||
32 | */ | ||
33 | if (pmd_index(addr) != pmd_index(saddr) || | ||
34 | vma->vm_flags != svma->vm_flags || | ||
35 | sbase < svma->vm_start || svma->vm_end < s_end) | ||
36 | return 0; | ||
37 | |||
38 | return saddr; | ||
39 | } | ||
40 | |||
41 | static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) | ||
42 | { | ||
43 | unsigned long base = addr & PUD_MASK; | ||
44 | unsigned long end = base + PUD_SIZE; | ||
45 | |||
46 | /* | ||
47 | * check on proper vm_flags and page table alignment | ||
48 | */ | ||
49 | if (vma->vm_flags & VM_MAYSHARE && | ||
50 | vma->vm_start <= base && end <= vma->vm_end) | ||
51 | return 1; | ||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * search for a shareable pmd page for hugetlb. | ||
57 | */ | ||
58 | static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | ||
59 | { | ||
60 | struct vm_area_struct *vma = find_vma(mm, addr); | ||
61 | struct address_space *mapping = vma->vm_file->f_mapping; | ||
62 | pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + | ||
63 | vma->vm_pgoff; | ||
64 | struct prio_tree_iter iter; | ||
65 | struct vm_area_struct *svma; | ||
66 | unsigned long saddr; | ||
67 | pte_t *spte = NULL; | ||
68 | |||
69 | if (!vma_shareable(vma, addr)) | ||
70 | return; | ||
71 | |||
72 | spin_lock(&mapping->i_mmap_lock); | ||
73 | vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { | ||
74 | if (svma == vma) | ||
75 | continue; | ||
76 | |||
77 | saddr = page_table_shareable(svma, vma, addr, idx); | ||
78 | if (saddr) { | ||
79 | spte = huge_pte_offset(svma->vm_mm, saddr); | ||
80 | if (spte) { | ||
81 | get_page(virt_to_page(spte)); | ||
82 | break; | ||
83 | } | ||
84 | } | ||
85 | } | ||
86 | |||
87 | if (!spte) | ||
88 | goto out; | ||
89 | |||
90 | spin_lock(&mm->page_table_lock); | ||
91 | if (pud_none(*pud)) | ||
92 | pud_populate(mm, pud, (unsigned long) spte & PAGE_MASK); | ||
93 | else | ||
94 | put_page(virt_to_page(spte)); | ||
95 | spin_unlock(&mm->page_table_lock); | ||
96 | out: | ||
97 | spin_unlock(&mapping->i_mmap_lock); | ||
98 | } | ||
99 | |||
100 | /* | ||
101 | * unmap huge page backed by shared pte. | ||
102 | * | ||
103 | * Hugetlb pte page is ref counted at the time of mapping. If pte is shared | ||
104 | * indicated by page_count > 1, unmap is achieved by clearing pud and | ||
105 | * decrementing the ref count. If count == 1, the pte page is not shared. | ||
106 | * | ||
107 | * called with vma->vm_mm->page_table_lock held. | ||
108 | * | ||
109 | * returns: 1 successfully unmapped a shared pte page | ||
110 | * 0 the underlying pte page is not shared, or it is the last user | ||
111 | */ | ||
112 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
113 | { | ||
114 | pgd_t *pgd = pgd_offset(mm, *addr); | ||
115 | pud_t *pud = pud_offset(pgd, *addr); | ||
116 | |||
117 | BUG_ON(page_count(virt_to_page(ptep)) == 0); | ||
118 | if (page_count(virt_to_page(ptep)) == 1) | ||
119 | return 0; | ||
120 | |||
121 | pud_clear(pud); | ||
122 | put_page(virt_to_page(ptep)); | ||
123 | *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; | ||
124 | return 1; | ||
125 | } | ||
126 | |||
20 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 127 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) |
21 | { | 128 | { |
22 | pgd_t *pgd; | 129 | pgd_t *pgd; |
@@ -25,8 +132,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
25 | 132 | ||
26 | pgd = pgd_offset(mm, addr); | 133 | pgd = pgd_offset(mm, addr); |
27 | pud = pud_alloc(mm, pgd, addr); | 134 | pud = pud_alloc(mm, pgd, addr); |
28 | if (pud) | 135 | if (pud) { |
136 | if (pud_none(*pud)) | ||
137 | huge_pmd_share(mm, addr, pud); | ||
29 | pte = (pte_t *) pmd_alloc(mm, pud, addr); | 138 | pte = (pte_t *) pmd_alloc(mm, pud, addr); |
139 | } | ||
30 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); | 140 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); |
31 | 141 | ||
32 | return pte; | 142 | return pte; |
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index f3a9585e98a8..0c7e94edc20e 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c | |||
@@ -64,6 +64,11 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr) | |||
64 | return pte; | 64 | return pte; |
65 | } | 65 | } |
66 | 66 | ||
67 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
68 | { | ||
69 | return 0; | ||
70 | } | ||
71 | |||
67 | #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } | 72 | #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } |
68 | 73 | ||
69 | /* | 74 | /* |
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 506d89768d45..424a8f57e155 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -146,6 +146,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
146 | return hugepte_offset(hpdp, addr); | 146 | return hugepte_offset(hpdp, addr); |
147 | } | 147 | } |
148 | 148 | ||
149 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
150 | { | ||
151 | return 0; | ||
152 | } | ||
153 | |||
149 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) | 154 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) |
150 | { | 155 | { |
151 | pte_t *hugepte = hugepd_page(*hpdp); | 156 | pte_t *hugepte = hugepd_page(*hpdp); |
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 329059d6b54a..cf2c2ee35a37 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c | |||
@@ -63,6 +63,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
63 | return pte; | 63 | return pte; |
64 | } | 64 | } |
65 | 65 | ||
66 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
67 | { | ||
68 | return 0; | ||
69 | } | ||
70 | |||
66 | struct page *follow_huge_addr(struct mm_struct *mm, | 71 | struct page *follow_huge_addr(struct mm_struct *mm, |
67 | unsigned long address, int write) | 72 | unsigned long address, int write) |
68 | { | 73 | { |
diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c index 187cf01750b8..4b455f611146 100644 --- a/arch/sh64/mm/hugetlbpage.c +++ b/arch/sh64/mm/hugetlbpage.c | |||
@@ -53,6 +53,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
53 | return pte; | 53 | return pte; |
54 | } | 54 | } |
55 | 55 | ||
56 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
57 | { | ||
58 | return 0; | ||
59 | } | ||
60 | |||
56 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | 61 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
57 | pte_t *ptep, pte_t entry) | 62 | pte_t *ptep, pte_t entry) |
58 | { | 63 | { |
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index 53b9b1f528e5..33fd0b265e70 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c | |||
@@ -235,6 +235,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
235 | return pte; | 235 | return pte; |
236 | } | 236 | } |
237 | 237 | ||
238 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
239 | { | ||
240 | return 0; | ||
241 | } | ||
242 | |||
238 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | 243 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
239 | pte_t *ptep, pte_t entry) | 244 | pte_t *ptep, pte_t entry) |
240 | { | 245 | { |
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ace64e57e17f..a60995afe334 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -35,6 +35,7 @@ extern int sysctl_hugetlb_shm_group; | |||
35 | 35 | ||
36 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr); | 36 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr); |
37 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr); | 37 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr); |
38 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); | ||
38 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | 39 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, |
39 | int write); | 40 | int write); |
40 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 41 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f7355bf2f285..9244971b6791 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -386,6 +386,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
386 | if (!ptep) | 386 | if (!ptep) |
387 | continue; | 387 | continue; |
388 | 388 | ||
389 | if (huge_pmd_unshare(mm, &address, ptep)) | ||
390 | continue; | ||
391 | |||
389 | pte = huge_ptep_get_and_clear(mm, address, ptep); | 392 | pte = huge_ptep_get_and_clear(mm, address, ptep); |
390 | if (pte_none(pte)) | 393 | if (pte_none(pte)) |
391 | continue; | 394 | continue; |
@@ -658,11 +661,14 @@ void hugetlb_change_protection(struct vm_area_struct *vma, | |||
658 | BUG_ON(address >= end); | 661 | BUG_ON(address >= end); |
659 | flush_cache_range(vma, address, end); | 662 | flush_cache_range(vma, address, end); |
660 | 663 | ||
664 | spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); | ||
661 | spin_lock(&mm->page_table_lock); | 665 | spin_lock(&mm->page_table_lock); |
662 | for (; address < end; address += HPAGE_SIZE) { | 666 | for (; address < end; address += HPAGE_SIZE) { |
663 | ptep = huge_pte_offset(mm, address); | 667 | ptep = huge_pte_offset(mm, address); |
664 | if (!ptep) | 668 | if (!ptep) |
665 | continue; | 669 | continue; |
670 | if (huge_pmd_unshare(mm, &address, ptep)) | ||
671 | continue; | ||
666 | if (!pte_none(*ptep)) { | 672 | if (!pte_none(*ptep)) { |
667 | pte = huge_ptep_get_and_clear(mm, address, ptep); | 673 | pte = huge_ptep_get_and_clear(mm, address, ptep); |
668 | pte = pte_mkhuge(pte_modify(pte, newprot)); | 674 | pte = pte_mkhuge(pte_modify(pte, newprot)); |
@@ -671,6 +677,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma, | |||
671 | } | 677 | } |
672 | } | 678 | } |
673 | spin_unlock(&mm->page_table_lock); | 679 | spin_unlock(&mm->page_table_lock); |
680 | spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); | ||
674 | 681 | ||
675 | flush_tlb_range(vma, start, end); | 682 | flush_tlb_range(vma, start, end); |
676 | } | 683 | } |