diff options
author | David Gibson <david@gibson.dropbear.id.au> | 2005-06-21 20:14:44 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-21 21:46:15 -0400 |
commit | 63551ae0feaaa23807ebea60de1901564bbef32e (patch) | |
tree | f6f97f60f83c3e9813bdfcc6039c499997b1ea10 | |
parent | 1e7e5a9048b30c57ba1ddaa6cdf59b21b65cde99 (diff) |
[PATCH] Hugepage consolidation
A lot of the code in arch/*/mm/hugetlbpage.c is quite similar. This patch
attempts to consolidate a lot of the code across the arch's, putting the
combined version in mm/hugetlb.c. There are a couple of uglyish hacks in
order to covert all the hugepage archs, but the result is a very large
reduction in the total amount of code. It also means things like hugepage
lazy allocation could be implemented in one place, instead of six.
Tested, at least a little, on ppc64, i386 and x86_64.
Notes:
- this patch changes the meaning of set_huge_pte() to be more
analagous to set_pte()
- does SH4 need s special huge_ptep_get_and_clear()??
Acked-by: William Lee Irwin <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/i386/mm/hugetlbpage.c | 170 | ||||
-rw-r--r-- | arch/ia64/mm/hugetlbpage.c | 158 | ||||
-rw-r--r-- | arch/ppc64/mm/hugetlbpage.c | 180 | ||||
-rw-r--r-- | arch/sh/mm/hugetlbpage.c | 196 | ||||
-rw-r--r-- | arch/sh64/mm/hugetlbpage.c | 18 | ||||
-rw-r--r-- | arch/sparc64/mm/hugetlbpage.c | 195 | ||||
-rw-r--r-- | include/asm-i386/page.h | 1 | ||||
-rw-r--r-- | include/asm-i386/pgtable.h | 2 | ||||
-rw-r--r-- | include/asm-ia64/pgtable.h | 1 | ||||
-rw-r--r-- | include/asm-sh/page.h | 1 | ||||
-rw-r--r-- | include/asm-sh/pgtable.h | 1 | ||||
-rw-r--r-- | include/asm-sh64/page.h | 1 | ||||
-rw-r--r-- | include/asm-sh64/pgtable.h | 2 | ||||
-rw-r--r-- | include/asm-sparc64/page.h | 2 | ||||
-rw-r--r-- | include/asm-sparc64/pgtable.h | 1 | ||||
-rw-r--r-- | include/asm-x86_64/page.h | 1 | ||||
-rw-r--r-- | include/asm-x86_64/pgtable.h | 3 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 40 | ||||
-rw-r--r-- | mm/hugetlb.c | 177 |
19 files changed, 300 insertions, 850 deletions
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c index 171fc925e1e4..5aa06001a4bd 100644 --- a/arch/i386/mm/hugetlbpage.c +++ b/arch/i386/mm/hugetlbpage.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include <asm/tlb.h> | 18 | #include <asm/tlb.h> |
19 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
20 | 20 | ||
21 | static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 21 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) |
22 | { | 22 | { |
23 | pgd_t *pgd; | 23 | pgd_t *pgd; |
24 | pud_t *pud; | 24 | pud_t *pud; |
@@ -30,7 +30,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
30 | return (pte_t *) pmd; | 30 | return (pte_t *) pmd; |
31 | } | 31 | } |
32 | 32 | ||
33 | static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 33 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
34 | { | 34 | { |
35 | pgd_t *pgd; | 35 | pgd_t *pgd; |
36 | pud_t *pud; | 36 | pud_t *pud; |
@@ -42,21 +42,6 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
42 | return (pte_t *) pmd; | 42 | return (pte_t *) pmd; |
43 | } | 43 | } |
44 | 44 | ||
45 | static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, pte_t * page_table, int write_access) | ||
46 | { | ||
47 | pte_t entry; | ||
48 | |||
49 | add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); | ||
50 | if (write_access) { | ||
51 | entry = | ||
52 | pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); | ||
53 | } else | ||
54 | entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); | ||
55 | entry = pte_mkyoung(entry); | ||
56 | mk_pte_huge(entry); | ||
57 | set_pte(page_table, entry); | ||
58 | } | ||
59 | |||
60 | /* | 45 | /* |
61 | * This function checks for proper alignment of input addr and len parameters. | 46 | * This function checks for proper alignment of input addr and len parameters. |
62 | */ | 47 | */ |
@@ -69,77 +54,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | |||
69 | return 0; | 54 | return 0; |
70 | } | 55 | } |
71 | 56 | ||
72 | int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | ||
73 | struct vm_area_struct *vma) | ||
74 | { | ||
75 | pte_t *src_pte, *dst_pte, entry; | ||
76 | struct page *ptepage; | ||
77 | unsigned long addr = vma->vm_start; | ||
78 | unsigned long end = vma->vm_end; | ||
79 | |||
80 | while (addr < end) { | ||
81 | dst_pte = huge_pte_alloc(dst, addr); | ||
82 | if (!dst_pte) | ||
83 | goto nomem; | ||
84 | src_pte = huge_pte_offset(src, addr); | ||
85 | entry = *src_pte; | ||
86 | ptepage = pte_page(entry); | ||
87 | get_page(ptepage); | ||
88 | set_pte(dst_pte, entry); | ||
89 | add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); | ||
90 | addr += HPAGE_SIZE; | ||
91 | } | ||
92 | return 0; | ||
93 | |||
94 | nomem: | ||
95 | return -ENOMEM; | ||
96 | } | ||
97 | |||
98 | int | ||
99 | follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
100 | struct page **pages, struct vm_area_struct **vmas, | ||
101 | unsigned long *position, int *length, int i) | ||
102 | { | ||
103 | unsigned long vpfn, vaddr = *position; | ||
104 | int remainder = *length; | ||
105 | |||
106 | WARN_ON(!is_vm_hugetlb_page(vma)); | ||
107 | |||
108 | vpfn = vaddr/PAGE_SIZE; | ||
109 | while (vaddr < vma->vm_end && remainder) { | ||
110 | |||
111 | if (pages) { | ||
112 | pte_t *pte; | ||
113 | struct page *page; | ||
114 | |||
115 | pte = huge_pte_offset(mm, vaddr); | ||
116 | |||
117 | /* hugetlb should be locked, and hence, prefaulted */ | ||
118 | WARN_ON(!pte || pte_none(*pte)); | ||
119 | |||
120 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; | ||
121 | |||
122 | WARN_ON(!PageCompound(page)); | ||
123 | |||
124 | get_page(page); | ||
125 | pages[i] = page; | ||
126 | } | ||
127 | |||
128 | if (vmas) | ||
129 | vmas[i] = vma; | ||
130 | |||
131 | vaddr += PAGE_SIZE; | ||
132 | ++vpfn; | ||
133 | --remainder; | ||
134 | ++i; | ||
135 | } | ||
136 | |||
137 | *length = remainder; | ||
138 | *position = vaddr; | ||
139 | |||
140 | return i; | ||
141 | } | ||
142 | |||
143 | #if 0 /* This is just for testing */ | 57 | #if 0 /* This is just for testing */ |
144 | struct page * | 58 | struct page * |
145 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | 59 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) |
@@ -204,83 +118,15 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
204 | } | 118 | } |
205 | #endif | 119 | #endif |
206 | 120 | ||
207 | void unmap_hugepage_range(struct vm_area_struct *vma, | 121 | void hugetlb_clean_stale_pgtable(pte_t *pte) |
208 | unsigned long start, unsigned long end) | ||
209 | { | 122 | { |
210 | struct mm_struct *mm = vma->vm_mm; | 123 | pmd_t *pmd = (pmd_t *) pte; |
211 | unsigned long address; | ||
212 | pte_t pte, *ptep; | ||
213 | struct page *page; | 124 | struct page *page; |
214 | 125 | ||
215 | BUG_ON(start & (HPAGE_SIZE - 1)); | 126 | page = pmd_page(*pmd); |
216 | BUG_ON(end & (HPAGE_SIZE - 1)); | 127 | pmd_clear(pmd); |
217 | 128 | dec_page_state(nr_page_table_pages); | |
218 | for (address = start; address < end; address += HPAGE_SIZE) { | 129 | page_cache_release(page); |
219 | ptep = huge_pte_offset(mm, address); | ||
220 | if (!ptep) | ||
221 | continue; | ||
222 | pte = ptep_get_and_clear(mm, address, ptep); | ||
223 | if (pte_none(pte)) | ||
224 | continue; | ||
225 | page = pte_page(pte); | ||
226 | put_page(page); | ||
227 | } | ||
228 | add_mm_counter(mm ,rss, -((end - start) >> PAGE_SHIFT)); | ||
229 | flush_tlb_range(vma, start, end); | ||
230 | } | ||
231 | |||
232 | int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) | ||
233 | { | ||
234 | struct mm_struct *mm = current->mm; | ||
235 | unsigned long addr; | ||
236 | int ret = 0; | ||
237 | |||
238 | BUG_ON(vma->vm_start & ~HPAGE_MASK); | ||
239 | BUG_ON(vma->vm_end & ~HPAGE_MASK); | ||
240 | |||
241 | spin_lock(&mm->page_table_lock); | ||
242 | for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { | ||
243 | unsigned long idx; | ||
244 | pte_t *pte = huge_pte_alloc(mm, addr); | ||
245 | struct page *page; | ||
246 | |||
247 | if (!pte) { | ||
248 | ret = -ENOMEM; | ||
249 | goto out; | ||
250 | } | ||
251 | |||
252 | if (!pte_none(*pte)) | ||
253 | continue; | ||
254 | |||
255 | idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) | ||
256 | + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | ||
257 | page = find_get_page(mapping, idx); | ||
258 | if (!page) { | ||
259 | /* charge the fs quota first */ | ||
260 | if (hugetlb_get_quota(mapping)) { | ||
261 | ret = -ENOMEM; | ||
262 | goto out; | ||
263 | } | ||
264 | page = alloc_huge_page(); | ||
265 | if (!page) { | ||
266 | hugetlb_put_quota(mapping); | ||
267 | ret = -ENOMEM; | ||
268 | goto out; | ||
269 | } | ||
270 | ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); | ||
271 | if (! ret) { | ||
272 | unlock_page(page); | ||
273 | } else { | ||
274 | hugetlb_put_quota(mapping); | ||
275 | free_huge_page(page); | ||
276 | goto out; | ||
277 | } | ||
278 | } | ||
279 | set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE); | ||
280 | } | ||
281 | out: | ||
282 | spin_unlock(&mm->page_table_lock); | ||
283 | return ret; | ||
284 | } | 130 | } |
285 | 131 | ||
286 | /* x86_64 also uses this file */ | 132 | /* x86_64 also uses this file */ |
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index df08ae7634b6..e0a776a3044c 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c | |||
@@ -24,7 +24,7 @@ | |||
24 | 24 | ||
25 | unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT; | 25 | unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT; |
26 | 26 | ||
27 | static pte_t * | 27 | pte_t * |
28 | huge_pte_alloc (struct mm_struct *mm, unsigned long addr) | 28 | huge_pte_alloc (struct mm_struct *mm, unsigned long addr) |
29 | { | 29 | { |
30 | unsigned long taddr = htlbpage_to_page(addr); | 30 | unsigned long taddr = htlbpage_to_page(addr); |
@@ -43,7 +43,7 @@ huge_pte_alloc (struct mm_struct *mm, unsigned long addr) | |||
43 | return pte; | 43 | return pte; |
44 | } | 44 | } |
45 | 45 | ||
46 | static pte_t * | 46 | pte_t * |
47 | huge_pte_offset (struct mm_struct *mm, unsigned long addr) | 47 | huge_pte_offset (struct mm_struct *mm, unsigned long addr) |
48 | { | 48 | { |
49 | unsigned long taddr = htlbpage_to_page(addr); | 49 | unsigned long taddr = htlbpage_to_page(addr); |
@@ -67,23 +67,6 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr) | |||
67 | 67 | ||
68 | #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } | 68 | #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } |
69 | 69 | ||
70 | static void | ||
71 | set_huge_pte (struct mm_struct *mm, struct vm_area_struct *vma, | ||
72 | struct page *page, pte_t * page_table, int write_access) | ||
73 | { | ||
74 | pte_t entry; | ||
75 | |||
76 | add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); | ||
77 | if (write_access) { | ||
78 | entry = | ||
79 | pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); | ||
80 | } else | ||
81 | entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); | ||
82 | entry = pte_mkyoung(entry); | ||
83 | mk_pte_huge(entry); | ||
84 | set_pte(page_table, entry); | ||
85 | return; | ||
86 | } | ||
87 | /* | 70 | /* |
88 | * This function checks for proper alignment of input addr and len parameters. | 71 | * This function checks for proper alignment of input addr and len parameters. |
89 | */ | 72 | */ |
@@ -99,68 +82,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | |||
99 | return 0; | 82 | return 0; |
100 | } | 83 | } |
101 | 84 | ||
102 | int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | ||
103 | struct vm_area_struct *vma) | ||
104 | { | ||
105 | pte_t *src_pte, *dst_pte, entry; | ||
106 | struct page *ptepage; | ||
107 | unsigned long addr = vma->vm_start; | ||
108 | unsigned long end = vma->vm_end; | ||
109 | |||
110 | while (addr < end) { | ||
111 | dst_pte = huge_pte_alloc(dst, addr); | ||
112 | if (!dst_pte) | ||
113 | goto nomem; | ||
114 | src_pte = huge_pte_offset(src, addr); | ||
115 | entry = *src_pte; | ||
116 | ptepage = pte_page(entry); | ||
117 | get_page(ptepage); | ||
118 | set_pte(dst_pte, entry); | ||
119 | add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); | ||
120 | addr += HPAGE_SIZE; | ||
121 | } | ||
122 | return 0; | ||
123 | nomem: | ||
124 | return -ENOMEM; | ||
125 | } | ||
126 | |||
127 | int | ||
128 | follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
129 | struct page **pages, struct vm_area_struct **vmas, | ||
130 | unsigned long *st, int *length, int i) | ||
131 | { | ||
132 | pte_t *ptep, pte; | ||
133 | unsigned long start = *st; | ||
134 | unsigned long pstart; | ||
135 | int len = *length; | ||
136 | struct page *page; | ||
137 | |||
138 | do { | ||
139 | pstart = start & HPAGE_MASK; | ||
140 | ptep = huge_pte_offset(mm, start); | ||
141 | pte = *ptep; | ||
142 | |||
143 | back1: | ||
144 | page = pte_page(pte); | ||
145 | if (pages) { | ||
146 | page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT); | ||
147 | get_page(page); | ||
148 | pages[i] = page; | ||
149 | } | ||
150 | if (vmas) | ||
151 | vmas[i] = vma; | ||
152 | i++; | ||
153 | len--; | ||
154 | start += PAGE_SIZE; | ||
155 | if (((start & HPAGE_MASK) == pstart) && len && | ||
156 | (start < vma->vm_end)) | ||
157 | goto back1; | ||
158 | } while (len && start < vma->vm_end); | ||
159 | *length = len; | ||
160 | *st = start; | ||
161 | return i; | ||
162 | } | ||
163 | |||
164 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write) | 85 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write) |
165 | { | 86 | { |
166 | struct page *page; | 87 | struct page *page; |
@@ -212,81 +133,6 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, | |||
212 | free_pgd_range(tlb, addr, end, floor, ceiling); | 133 | free_pgd_range(tlb, addr, end, floor, ceiling); |
213 | } | 134 | } |
214 | 135 | ||
215 | void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) | ||
216 | { | ||
217 | struct mm_struct *mm = vma->vm_mm; | ||
218 | unsigned long address; | ||
219 | pte_t *pte; | ||
220 | struct page *page; | ||
221 | |||
222 | BUG_ON(start & (HPAGE_SIZE - 1)); | ||
223 | BUG_ON(end & (HPAGE_SIZE - 1)); | ||
224 | |||
225 | for (address = start; address < end; address += HPAGE_SIZE) { | ||
226 | pte = huge_pte_offset(mm, address); | ||
227 | if (pte_none(*pte)) | ||
228 | continue; | ||
229 | page = pte_page(*pte); | ||
230 | put_page(page); | ||
231 | pte_clear(mm, address, pte); | ||
232 | } | ||
233 | add_mm_counter(mm, rss, - ((end - start) >> PAGE_SHIFT)); | ||
234 | flush_tlb_range(vma, start, end); | ||
235 | } | ||
236 | |||
237 | int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) | ||
238 | { | ||
239 | struct mm_struct *mm = current->mm; | ||
240 | unsigned long addr; | ||
241 | int ret = 0; | ||
242 | |||
243 | BUG_ON(vma->vm_start & ~HPAGE_MASK); | ||
244 | BUG_ON(vma->vm_end & ~HPAGE_MASK); | ||
245 | |||
246 | spin_lock(&mm->page_table_lock); | ||
247 | for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { | ||
248 | unsigned long idx; | ||
249 | pte_t *pte = huge_pte_alloc(mm, addr); | ||
250 | struct page *page; | ||
251 | |||
252 | if (!pte) { | ||
253 | ret = -ENOMEM; | ||
254 | goto out; | ||
255 | } | ||
256 | if (!pte_none(*pte)) | ||
257 | continue; | ||
258 | |||
259 | idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) | ||
260 | + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | ||
261 | page = find_get_page(mapping, idx); | ||
262 | if (!page) { | ||
263 | /* charge the fs quota first */ | ||
264 | if (hugetlb_get_quota(mapping)) { | ||
265 | ret = -ENOMEM; | ||
266 | goto out; | ||
267 | } | ||
268 | page = alloc_huge_page(); | ||
269 | if (!page) { | ||
270 | hugetlb_put_quota(mapping); | ||
271 | ret = -ENOMEM; | ||
272 | goto out; | ||
273 | } | ||
274 | ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); | ||
275 | if (! ret) { | ||
276 | unlock_page(page); | ||
277 | } else { | ||
278 | hugetlb_put_quota(mapping); | ||
279 | page_cache_release(page); | ||
280 | goto out; | ||
281 | } | ||
282 | } | ||
283 | set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE); | ||
284 | } | ||
285 | out: | ||
286 | spin_unlock(&mm->page_table_lock); | ||
287 | return ret; | ||
288 | } | ||
289 | |||
290 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, | 136 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, |
291 | unsigned long pgoff, unsigned long flags) | 137 | unsigned long pgoff, unsigned long flags) |
292 | { | 138 | { |
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index d3bf86a5c1ad..b4ab766f5980 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c | |||
@@ -121,7 +121,7 @@ static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr | |||
121 | return hugepte_offset(dir, addr); | 121 | return hugepte_offset(dir, addr); |
122 | } | 122 | } |
123 | 123 | ||
124 | static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 124 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
125 | { | 125 | { |
126 | pud_t *pud; | 126 | pud_t *pud; |
127 | 127 | ||
@@ -134,7 +134,7 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
134 | return hugepte_offset(pud, addr); | 134 | return hugepte_offset(pud, addr); |
135 | } | 135 | } |
136 | 136 | ||
137 | static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 137 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) |
138 | { | 138 | { |
139 | pud_t *pud; | 139 | pud_t *pud; |
140 | 140 | ||
@@ -147,25 +147,6 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
147 | return hugepte_alloc(mm, pud, addr); | 147 | return hugepte_alloc(mm, pud, addr); |
148 | } | 148 | } |
149 | 149 | ||
150 | static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, | ||
151 | unsigned long addr, struct page *page, | ||
152 | pte_t *ptep, int write_access) | ||
153 | { | ||
154 | pte_t entry; | ||
155 | |||
156 | add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); | ||
157 | if (write_access) { | ||
158 | entry = | ||
159 | pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); | ||
160 | } else { | ||
161 | entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); | ||
162 | } | ||
163 | entry = pte_mkyoung(entry); | ||
164 | entry = pte_mkhuge(entry); | ||
165 | |||
166 | set_pte_at(mm, addr, ptep, entry); | ||
167 | } | ||
168 | |||
169 | /* | 150 | /* |
170 | * This function checks for proper alignment of input addr and len parameters. | 151 | * This function checks for proper alignment of input addr and len parameters. |
171 | */ | 152 | */ |
@@ -259,80 +240,6 @@ int prepare_hugepage_range(unsigned long addr, unsigned long len) | |||
259 | return -EINVAL; | 240 | return -EINVAL; |
260 | } | 241 | } |
261 | 242 | ||
262 | int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | ||
263 | struct vm_area_struct *vma) | ||
264 | { | ||
265 | pte_t *src_pte, *dst_pte, entry; | ||
266 | struct page *ptepage; | ||
267 | unsigned long addr = vma->vm_start; | ||
268 | unsigned long end = vma->vm_end; | ||
269 | int err = -ENOMEM; | ||
270 | |||
271 | while (addr < end) { | ||
272 | dst_pte = huge_pte_alloc(dst, addr); | ||
273 | if (!dst_pte) | ||
274 | goto out; | ||
275 | |||
276 | src_pte = huge_pte_offset(src, addr); | ||
277 | entry = *src_pte; | ||
278 | |||
279 | ptepage = pte_page(entry); | ||
280 | get_page(ptepage); | ||
281 | add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); | ||
282 | set_pte_at(dst, addr, dst_pte, entry); | ||
283 | |||
284 | addr += HPAGE_SIZE; | ||
285 | } | ||
286 | |||
287 | err = 0; | ||
288 | out: | ||
289 | return err; | ||
290 | } | ||
291 | |||
292 | int | ||
293 | follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
294 | struct page **pages, struct vm_area_struct **vmas, | ||
295 | unsigned long *position, int *length, int i) | ||
296 | { | ||
297 | unsigned long vpfn, vaddr = *position; | ||
298 | int remainder = *length; | ||
299 | |||
300 | WARN_ON(!is_vm_hugetlb_page(vma)); | ||
301 | |||
302 | vpfn = vaddr/PAGE_SIZE; | ||
303 | while (vaddr < vma->vm_end && remainder) { | ||
304 | if (pages) { | ||
305 | pte_t *pte; | ||
306 | struct page *page; | ||
307 | |||
308 | pte = huge_pte_offset(mm, vaddr); | ||
309 | |||
310 | /* hugetlb should be locked, and hence, prefaulted */ | ||
311 | WARN_ON(!pte || pte_none(*pte)); | ||
312 | |||
313 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; | ||
314 | |||
315 | WARN_ON(!PageCompound(page)); | ||
316 | |||
317 | get_page(page); | ||
318 | pages[i] = page; | ||
319 | } | ||
320 | |||
321 | if (vmas) | ||
322 | vmas[i] = vma; | ||
323 | |||
324 | vaddr += PAGE_SIZE; | ||
325 | ++vpfn; | ||
326 | --remainder; | ||
327 | ++i; | ||
328 | } | ||
329 | |||
330 | *length = remainder; | ||
331 | *position = vaddr; | ||
332 | |||
333 | return i; | ||
334 | } | ||
335 | |||
336 | struct page * | 243 | struct page * |
337 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | 244 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) |
338 | { | 245 | { |
@@ -363,89 +270,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
363 | return NULL; | 270 | return NULL; |
364 | } | 271 | } |
365 | 272 | ||
366 | void unmap_hugepage_range(struct vm_area_struct *vma, | ||
367 | unsigned long start, unsigned long end) | ||
368 | { | ||
369 | struct mm_struct *mm = vma->vm_mm; | ||
370 | unsigned long addr; | ||
371 | pte_t *ptep; | ||
372 | struct page *page; | ||
373 | |||
374 | WARN_ON(!is_vm_hugetlb_page(vma)); | ||
375 | BUG_ON((start % HPAGE_SIZE) != 0); | ||
376 | BUG_ON((end % HPAGE_SIZE) != 0); | ||
377 | |||
378 | for (addr = start; addr < end; addr += HPAGE_SIZE) { | ||
379 | pte_t pte; | ||
380 | |||
381 | ptep = huge_pte_offset(mm, addr); | ||
382 | if (!ptep || pte_none(*ptep)) | ||
383 | continue; | ||
384 | |||
385 | pte = *ptep; | ||
386 | page = pte_page(pte); | ||
387 | pte_clear(mm, addr, ptep); | ||
388 | |||
389 | put_page(page); | ||
390 | } | ||
391 | add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT)); | ||
392 | flush_tlb_pending(); | ||
393 | } | ||
394 | |||
395 | int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) | ||
396 | { | ||
397 | struct mm_struct *mm = current->mm; | ||
398 | unsigned long addr; | ||
399 | int ret = 0; | ||
400 | |||
401 | WARN_ON(!is_vm_hugetlb_page(vma)); | ||
402 | BUG_ON((vma->vm_start % HPAGE_SIZE) != 0); | ||
403 | BUG_ON((vma->vm_end % HPAGE_SIZE) != 0); | ||
404 | |||
405 | spin_lock(&mm->page_table_lock); | ||
406 | for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { | ||
407 | unsigned long idx; | ||
408 | pte_t *pte = huge_pte_alloc(mm, addr); | ||
409 | struct page *page; | ||
410 | |||
411 | if (!pte) { | ||
412 | ret = -ENOMEM; | ||
413 | goto out; | ||
414 | } | ||
415 | if (! pte_none(*pte)) | ||
416 | continue; | ||
417 | |||
418 | idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) | ||
419 | + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | ||
420 | page = find_get_page(mapping, idx); | ||
421 | if (!page) { | ||
422 | /* charge the fs quota first */ | ||
423 | if (hugetlb_get_quota(mapping)) { | ||
424 | ret = -ENOMEM; | ||
425 | goto out; | ||
426 | } | ||
427 | page = alloc_huge_page(); | ||
428 | if (!page) { | ||
429 | hugetlb_put_quota(mapping); | ||
430 | ret = -ENOMEM; | ||
431 | goto out; | ||
432 | } | ||
433 | ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); | ||
434 | if (! ret) { | ||
435 | unlock_page(page); | ||
436 | } else { | ||
437 | hugetlb_put_quota(mapping); | ||
438 | free_huge_page(page); | ||
439 | goto out; | ||
440 | } | ||
441 | } | ||
442 | set_huge_pte(mm, vma, addr, page, pte, vma->vm_flags & VM_WRITE); | ||
443 | } | ||
444 | out: | ||
445 | spin_unlock(&mm->page_table_lock); | ||
446 | return ret; | ||
447 | } | ||
448 | |||
449 | /* Because we have an exclusive hugepage region which lies within the | 273 | /* Because we have an exclusive hugepage region which lies within the |
450 | * normal user address space, we have to take special measures to make | 274 | * normal user address space, we have to take special measures to make |
451 | * non-huge mmap()s evade the hugepage reserved regions. */ | 275 | * non-huge mmap()s evade the hugepage reserved regions. */ |
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 1f897bab2318..95bb1a6c6060 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c | |||
@@ -24,7 +24,7 @@ | |||
24 | #include <asm/tlbflush.h> | 24 | #include <asm/tlbflush.h> |
25 | #include <asm/cacheflush.h> | 25 | #include <asm/cacheflush.h> |
26 | 26 | ||
27 | static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 27 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) |
28 | { | 28 | { |
29 | pgd_t *pgd; | 29 | pgd_t *pgd; |
30 | pmd_t *pmd; | 30 | pmd_t *pmd; |
@@ -39,7 +39,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
39 | return pte; | 39 | return pte; |
40 | } | 40 | } |
41 | 41 | ||
42 | static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 42 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
43 | { | 43 | { |
44 | pgd_t *pgd; | 44 | pgd_t *pgd; |
45 | pmd_t *pmd; | 45 | pmd_t *pmd; |
@@ -56,28 +56,34 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
56 | 56 | ||
57 | #define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0) | 57 | #define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0) |
58 | 58 | ||
59 | static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, | 59 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
60 | struct page *page, pte_t * page_table, int write_access) | 60 | pte_t *ptep, pte_t entry) |
61 | { | 61 | { |
62 | unsigned long i; | 62 | int i; |
63 | pte_t entry; | ||
64 | 63 | ||
65 | add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); | 64 | for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { |
65 | set_pte_at(mm, addr, ptep, entry); | ||
66 | ptep++; | ||
67 | addr += PAGE_SIZE; | ||
68 | pte_val(entry) += PAGE_SIZE; | ||
69 | } | ||
70 | } | ||
66 | 71 | ||
67 | if (write_access) | 72 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, |
68 | entry = pte_mkwrite(pte_mkdirty(mk_pte(page, | 73 | pte_t *ptep) |
69 | vma->vm_page_prot))); | 74 | { |
70 | else | 75 | pte_t entry; |
71 | entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); | 76 | int i; |
72 | entry = pte_mkyoung(entry); | ||
73 | mk_pte_huge(entry); | ||
74 | 77 | ||
75 | for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { | 78 | entry = *ptep; |
76 | set_pte(page_table, entry); | ||
77 | page_table++; | ||
78 | 79 | ||
79 | pte_val(entry) += PAGE_SIZE; | 80 | for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { |
81 | pte_clear(mm, addr, ptep); | ||
82 | addr += PAGE_SIZE; | ||
83 | ptep++; | ||
80 | } | 84 | } |
85 | |||
86 | return entry; | ||
81 | } | 87 | } |
82 | 88 | ||
83 | /* | 89 | /* |
@@ -92,79 +98,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | |||
92 | return 0; | 98 | return 0; |
93 | } | 99 | } |
94 | 100 | ||
95 | int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | ||
96 | struct vm_area_struct *vma) | ||
97 | { | ||
98 | pte_t *src_pte, *dst_pte, entry; | ||
99 | struct page *ptepage; | ||
100 | unsigned long addr = vma->vm_start; | ||
101 | unsigned long end = vma->vm_end; | ||
102 | int i; | ||
103 | |||
104 | while (addr < end) { | ||
105 | dst_pte = huge_pte_alloc(dst, addr); | ||
106 | if (!dst_pte) | ||
107 | goto nomem; | ||
108 | src_pte = huge_pte_offset(src, addr); | ||
109 | BUG_ON(!src_pte || pte_none(*src_pte)); | ||
110 | entry = *src_pte; | ||
111 | ptepage = pte_page(entry); | ||
112 | get_page(ptepage); | ||
113 | for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { | ||
114 | set_pte(dst_pte, entry); | ||
115 | pte_val(entry) += PAGE_SIZE; | ||
116 | dst_pte++; | ||
117 | } | ||
118 | add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); | ||
119 | addr += HPAGE_SIZE; | ||
120 | } | ||
121 | return 0; | ||
122 | |||
123 | nomem: | ||
124 | return -ENOMEM; | ||
125 | } | ||
126 | |||
127 | int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
128 | struct page **pages, struct vm_area_struct **vmas, | ||
129 | unsigned long *position, int *length, int i) | ||
130 | { | ||
131 | unsigned long vaddr = *position; | ||
132 | int remainder = *length; | ||
133 | |||
134 | WARN_ON(!is_vm_hugetlb_page(vma)); | ||
135 | |||
136 | while (vaddr < vma->vm_end && remainder) { | ||
137 | if (pages) { | ||
138 | pte_t *pte; | ||
139 | struct page *page; | ||
140 | |||
141 | pte = huge_pte_offset(mm, vaddr); | ||
142 | |||
143 | /* hugetlb should be locked, and hence, prefaulted */ | ||
144 | BUG_ON(!pte || pte_none(*pte)); | ||
145 | |||
146 | page = pte_page(*pte); | ||
147 | |||
148 | WARN_ON(!PageCompound(page)); | ||
149 | |||
150 | get_page(page); | ||
151 | pages[i] = page; | ||
152 | } | ||
153 | |||
154 | if (vmas) | ||
155 | vmas[i] = vma; | ||
156 | |||
157 | vaddr += PAGE_SIZE; | ||
158 | --remainder; | ||
159 | ++i; | ||
160 | } | ||
161 | |||
162 | *length = remainder; | ||
163 | *position = vaddr; | ||
164 | |||
165 | return i; | ||
166 | } | ||
167 | |||
168 | struct page *follow_huge_addr(struct mm_struct *mm, | 101 | struct page *follow_huge_addr(struct mm_struct *mm, |
169 | unsigned long address, int write) | 102 | unsigned long address, int write) |
170 | { | 103 | { |
@@ -181,84 +114,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
181 | { | 114 | { |
182 | return NULL; | 115 | return NULL; |
183 | } | 116 | } |
184 | |||
185 | void unmap_hugepage_range(struct vm_area_struct *vma, | ||
186 | unsigned long start, unsigned long end) | ||
187 | { | ||
188 | struct mm_struct *mm = vma->vm_mm; | ||
189 | unsigned long address; | ||
190 | pte_t *pte; | ||
191 | struct page *page; | ||
192 | int i; | ||
193 | |||
194 | BUG_ON(start & (HPAGE_SIZE - 1)); | ||
195 | BUG_ON(end & (HPAGE_SIZE - 1)); | ||
196 | |||
197 | for (address = start; address < end; address += HPAGE_SIZE) { | ||
198 | pte = huge_pte_offset(mm, address); | ||
199 | BUG_ON(!pte); | ||
200 | if (pte_none(*pte)) | ||
201 | continue; | ||
202 | page = pte_page(*pte); | ||
203 | put_page(page); | ||
204 | for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { | ||
205 | pte_clear(mm, address+(i*PAGE_SIZE), pte); | ||
206 | pte++; | ||
207 | } | ||
208 | } | ||
209 | add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT)); | ||
210 | flush_tlb_range(vma, start, end); | ||
211 | } | ||
212 | |||
213 | int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) | ||
214 | { | ||
215 | struct mm_struct *mm = current->mm; | ||
216 | unsigned long addr; | ||
217 | int ret = 0; | ||
218 | |||
219 | BUG_ON(vma->vm_start & ~HPAGE_MASK); | ||
220 | BUG_ON(vma->vm_end & ~HPAGE_MASK); | ||
221 | |||
222 | spin_lock(&mm->page_table_lock); | ||
223 | for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { | ||
224 | unsigned long idx; | ||
225 | pte_t *pte = huge_pte_alloc(mm, addr); | ||
226 | struct page *page; | ||
227 | |||
228 | if (!pte) { | ||
229 | ret = -ENOMEM; | ||
230 | goto out; | ||
231 | } | ||
232 | if (!pte_none(*pte)) | ||
233 | continue; | ||
234 | |||
235 | idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) | ||
236 | + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | ||
237 | page = find_get_page(mapping, idx); | ||
238 | if (!page) { | ||
239 | /* charge the fs quota first */ | ||
240 | if (hugetlb_get_quota(mapping)) { | ||
241 | ret = -ENOMEM; | ||
242 | goto out; | ||
243 | } | ||
244 | page = alloc_huge_page(); | ||
245 | if (!page) { | ||
246 | hugetlb_put_quota(mapping); | ||
247 | ret = -ENOMEM; | ||
248 | goto out; | ||
249 | } | ||
250 | ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); | ||
251 | if (! ret) { | ||
252 | unlock_page(page); | ||
253 | } else { | ||
254 | hugetlb_put_quota(mapping); | ||
255 | free_huge_page(page); | ||
256 | goto out; | ||
257 | } | ||
258 | } | ||
259 | set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE); | ||
260 | } | ||
261 | out: | ||
262 | spin_unlock(&mm->page_table_lock); | ||
263 | return ret; | ||
264 | } | ||
diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c index bcad2aefa4ee..dcd9c8a8baf8 100644 --- a/arch/sh64/mm/hugetlbpage.c +++ b/arch/sh64/mm/hugetlbpage.c | |||
@@ -24,7 +24,7 @@ | |||
24 | #include <asm/tlbflush.h> | 24 | #include <asm/tlbflush.h> |
25 | #include <asm/cacheflush.h> | 25 | #include <asm/cacheflush.h> |
26 | 26 | ||
27 | static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 27 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) |
28 | { | 28 | { |
29 | pgd_t *pgd; | 29 | pgd_t *pgd; |
30 | pmd_t *pmd; | 30 | pmd_t *pmd; |
@@ -39,7 +39,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
39 | return pte; | 39 | return pte; |
40 | } | 40 | } |
41 | 41 | ||
42 | static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 42 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
43 | { | 43 | { |
44 | pgd_t *pgd; | 44 | pgd_t *pgd; |
45 | pmd_t *pmd; | 45 | pmd_t *pmd; |
@@ -80,6 +80,20 @@ static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, | |||
80 | } | 80 | } |
81 | } | 81 | } |
82 | 82 | ||
83 | pte_t huge_ptep_get_and_clear(pte_t *ptep) | ||
84 | { | ||
85 | pte_t entry; | ||
86 | |||
87 | entry = *ptep; | ||
88 | |||
89 | for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { | ||
90 | pte_clear(pte); | ||
91 | pte++; | ||
92 | } | ||
93 | |||
94 | return entry; | ||
95 | } | ||
96 | |||
83 | /* | 97 | /* |
84 | * This function checks for proper alignment of input addr and len parameters. | 98 | * This function checks for proper alignment of input addr and len parameters. |
85 | */ | 99 | */ |
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index 5a1f831b2de1..625cbb336a23 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c | |||
@@ -22,7 +22,7 @@ | |||
22 | #include <asm/cacheflush.h> | 22 | #include <asm/cacheflush.h> |
23 | #include <asm/mmu_context.h> | 23 | #include <asm/mmu_context.h> |
24 | 24 | ||
25 | static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 25 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) |
26 | { | 26 | { |
27 | pgd_t *pgd; | 27 | pgd_t *pgd; |
28 | pud_t *pud; | 28 | pud_t *pud; |
@@ -41,7 +41,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
41 | return pte; | 41 | return pte; |
42 | } | 42 | } |
43 | 43 | ||
44 | static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 44 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
45 | { | 45 | { |
46 | pgd_t *pgd; | 46 | pgd_t *pgd; |
47 | pud_t *pud; | 47 | pud_t *pud; |
@@ -62,30 +62,34 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
62 | 62 | ||
63 | #define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0) | 63 | #define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0) |
64 | 64 | ||
65 | static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, | 65 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
66 | unsigned long addr, | 66 | pte_t *ptep, pte_t entry) |
67 | struct page *page, pte_t * page_table, int write_access) | ||
68 | { | 67 | { |
69 | unsigned long i; | 68 | int i; |
70 | pte_t entry; | 69 | |
70 | for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { | ||
71 | set_pte_at(mm, addr, ptep, entry); | ||
72 | ptep++; | ||
73 | addr += PAGE_SIZE; | ||
74 | pte_val(entry) += PAGE_SIZE; | ||
75 | } | ||
76 | } | ||
71 | 77 | ||
72 | add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); | 78 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, |
79 | pte_t *ptep) | ||
80 | { | ||
81 | pte_t entry; | ||
82 | int i; | ||
73 | 83 | ||
74 | if (write_access) | 84 | entry = *ptep; |
75 | entry = pte_mkwrite(pte_mkdirty(mk_pte(page, | ||
76 | vma->vm_page_prot))); | ||
77 | else | ||
78 | entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); | ||
79 | entry = pte_mkyoung(entry); | ||
80 | mk_pte_huge(entry); | ||
81 | 85 | ||
82 | for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { | 86 | for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { |
83 | set_pte_at(mm, addr, page_table, entry); | 87 | pte_clear(mm, addr, ptep); |
84 | page_table++; | ||
85 | addr += PAGE_SIZE; | 88 | addr += PAGE_SIZE; |
86 | 89 | ptep++; | |
87 | pte_val(entry) += PAGE_SIZE; | ||
88 | } | 90 | } |
91 | |||
92 | return entry; | ||
89 | } | 93 | } |
90 | 94 | ||
91 | /* | 95 | /* |
@@ -100,79 +104,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | |||
100 | return 0; | 104 | return 0; |
101 | } | 105 | } |
102 | 106 | ||
103 | int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | ||
104 | struct vm_area_struct *vma) | ||
105 | { | ||
106 | pte_t *src_pte, *dst_pte, entry; | ||
107 | struct page *ptepage; | ||
108 | unsigned long addr = vma->vm_start; | ||
109 | unsigned long end = vma->vm_end; | ||
110 | int i; | ||
111 | |||
112 | while (addr < end) { | ||
113 | dst_pte = huge_pte_alloc(dst, addr); | ||
114 | if (!dst_pte) | ||
115 | goto nomem; | ||
116 | src_pte = huge_pte_offset(src, addr); | ||
117 | BUG_ON(!src_pte || pte_none(*src_pte)); | ||
118 | entry = *src_pte; | ||
119 | ptepage = pte_page(entry); | ||
120 | get_page(ptepage); | ||
121 | for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { | ||
122 | set_pte_at(dst, addr, dst_pte, entry); | ||
123 | pte_val(entry) += PAGE_SIZE; | ||
124 | dst_pte++; | ||
125 | addr += PAGE_SIZE; | ||
126 | } | ||
127 | add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); | ||
128 | } | ||
129 | return 0; | ||
130 | |||
131 | nomem: | ||
132 | return -ENOMEM; | ||
133 | } | ||
134 | |||
135 | int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
136 | struct page **pages, struct vm_area_struct **vmas, | ||
137 | unsigned long *position, int *length, int i) | ||
138 | { | ||
139 | unsigned long vaddr = *position; | ||
140 | int remainder = *length; | ||
141 | |||
142 | WARN_ON(!is_vm_hugetlb_page(vma)); | ||
143 | |||
144 | while (vaddr < vma->vm_end && remainder) { | ||
145 | if (pages) { | ||
146 | pte_t *pte; | ||
147 | struct page *page; | ||
148 | |||
149 | pte = huge_pte_offset(mm, vaddr); | ||
150 | |||
151 | /* hugetlb should be locked, and hence, prefaulted */ | ||
152 | BUG_ON(!pte || pte_none(*pte)); | ||
153 | |||
154 | page = pte_page(*pte); | ||
155 | |||
156 | WARN_ON(!PageCompound(page)); | ||
157 | |||
158 | get_page(page); | ||
159 | pages[i] = page; | ||
160 | } | ||
161 | |||
162 | if (vmas) | ||
163 | vmas[i] = vma; | ||
164 | |||
165 | vaddr += PAGE_SIZE; | ||
166 | --remainder; | ||
167 | ++i; | ||
168 | } | ||
169 | |||
170 | *length = remainder; | ||
171 | *position = vaddr; | ||
172 | |||
173 | return i; | ||
174 | } | ||
175 | |||
176 | struct page *follow_huge_addr(struct mm_struct *mm, | 107 | struct page *follow_huge_addr(struct mm_struct *mm, |
177 | unsigned long address, int write) | 108 | unsigned long address, int write) |
178 | { | 109 | { |
@@ -190,34 +121,6 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
190 | return NULL; | 121 | return NULL; |
191 | } | 122 | } |
192 | 123 | ||
193 | void unmap_hugepage_range(struct vm_area_struct *vma, | ||
194 | unsigned long start, unsigned long end) | ||
195 | { | ||
196 | struct mm_struct *mm = vma->vm_mm; | ||
197 | unsigned long address; | ||
198 | pte_t *pte; | ||
199 | struct page *page; | ||
200 | int i; | ||
201 | |||
202 | BUG_ON(start & (HPAGE_SIZE - 1)); | ||
203 | BUG_ON(end & (HPAGE_SIZE - 1)); | ||
204 | |||
205 | for (address = start; address < end; address += HPAGE_SIZE) { | ||
206 | pte = huge_pte_offset(mm, address); | ||
207 | BUG_ON(!pte); | ||
208 | if (pte_none(*pte)) | ||
209 | continue; | ||
210 | page = pte_page(*pte); | ||
211 | put_page(page); | ||
212 | for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { | ||
213 | pte_clear(mm, address+(i*PAGE_SIZE), pte); | ||
214 | pte++; | ||
215 | } | ||
216 | } | ||
217 | add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT)); | ||
218 | flush_tlb_range(vma, start, end); | ||
219 | } | ||
220 | |||
221 | static void context_reload(void *__data) | 124 | static void context_reload(void *__data) |
222 | { | 125 | { |
223 | struct mm_struct *mm = __data; | 126 | struct mm_struct *mm = __data; |
@@ -226,12 +129,8 @@ static void context_reload(void *__data) | |||
226 | load_secondary_context(mm); | 129 | load_secondary_context(mm); |
227 | } | 130 | } |
228 | 131 | ||
229 | int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) | 132 | void hugetlb_prefault_arch_hook(struct mm_struct *mm) |
230 | { | 133 | { |
231 | struct mm_struct *mm = current->mm; | ||
232 | unsigned long addr; | ||
233 | int ret = 0; | ||
234 | |||
235 | /* On UltraSPARC-III+ and later, configure the second half of | 134 | /* On UltraSPARC-III+ and later, configure the second half of |
236 | * the Data-TLB for huge pages. | 135 | * the Data-TLB for huge pages. |
237 | */ | 136 | */ |
@@ -261,50 +160,4 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) | |||
261 | } | 160 | } |
262 | spin_unlock(&ctx_alloc_lock); | 161 | spin_unlock(&ctx_alloc_lock); |
263 | } | 162 | } |
264 | |||
265 | BUG_ON(vma->vm_start & ~HPAGE_MASK); | ||
266 | BUG_ON(vma->vm_end & ~HPAGE_MASK); | ||
267 | |||
268 | spin_lock(&mm->page_table_lock); | ||
269 | for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { | ||
270 | unsigned long idx; | ||
271 | pte_t *pte = huge_pte_alloc(mm, addr); | ||
272 | struct page *page; | ||
273 | |||
274 | if (!pte) { | ||
275 | ret = -ENOMEM; | ||
276 | goto out; | ||
277 | } | ||
278 | if (!pte_none(*pte)) | ||
279 | continue; | ||
280 | |||
281 | idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) | ||
282 | + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | ||
283 | page = find_get_page(mapping, idx); | ||
284 | if (!page) { | ||
285 | /* charge the fs quota first */ | ||
286 | if (hugetlb_get_quota(mapping)) { | ||
287 | ret = -ENOMEM; | ||
288 | goto out; | ||
289 | } | ||
290 | page = alloc_huge_page(); | ||
291 | if (!page) { | ||
292 | hugetlb_put_quota(mapping); | ||
293 | ret = -ENOMEM; | ||
294 | goto out; | ||
295 | } | ||
296 | ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); | ||
297 | if (! ret) { | ||
298 | unlock_page(page); | ||
299 | } else { | ||
300 | hugetlb_put_quota(mapping); | ||
301 | free_huge_page(page); | ||
302 | goto out; | ||
303 | } | ||
304 | } | ||
305 | set_huge_pte(mm, vma, addr, page, pte, vma->vm_flags & VM_WRITE); | ||
306 | } | ||
307 | out: | ||
308 | spin_unlock(&mm->page_table_lock); | ||
309 | return ret; | ||
310 | } | 163 | } |
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index ed13969fa2d6..41400d342d44 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h | |||
@@ -68,6 +68,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; | |||
68 | #define HPAGE_MASK (~(HPAGE_SIZE - 1)) | 68 | #define HPAGE_MASK (~(HPAGE_SIZE - 1)) |
69 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | 69 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) |
70 | #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA | 70 | #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA |
71 | #define ARCH_HAS_HUGETLB_CLEAN_STALE_PGTABLE | ||
71 | #endif | 72 | #endif |
72 | 73 | ||
73 | #define pgd_val(x) ((x).pgd) | 74 | #define pgd_val(x) ((x).pgd) |
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 8d60c2b4b003..e9efe148fdf7 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h | |||
@@ -236,6 +236,7 @@ static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return | |||
236 | static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } | 236 | static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } |
237 | static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } | 237 | static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } |
238 | static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } | 238 | static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } |
239 | static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PRESENT | _PAGE_PSE; return pte; } | ||
239 | 240 | ||
240 | #ifdef CONFIG_X86_PAE | 241 | #ifdef CONFIG_X86_PAE |
241 | # include <asm/pgtable-3level.h> | 242 | # include <asm/pgtable-3level.h> |
@@ -275,7 +276,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, | |||
275 | */ | 276 | */ |
276 | 277 | ||
277 | #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) | 278 | #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) |
278 | #define mk_pte_huge(entry) ((entry).pte_low |= _PAGE_PRESENT | _PAGE_PSE) | ||
279 | 279 | ||
280 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | 280 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) |
281 | { | 281 | { |
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h index fcc9c3344ab4..48586e08f432 100644 --- a/include/asm-ia64/pgtable.h +++ b/include/asm-ia64/pgtable.h | |||
@@ -283,6 +283,7 @@ ia64_phys_addr_valid (unsigned long addr) | |||
283 | #define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A)) | 283 | #define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A)) |
284 | #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) | 284 | #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) |
285 | #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) | 285 | #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) |
286 | #define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_P)) | ||
286 | 287 | ||
287 | /* | 288 | /* |
288 | * Macro to a page protection value as "uncacheable". Note that "protection" is really a | 289 | * Macro to a page protection value as "uncacheable". Note that "protection" is really a |
diff --git a/include/asm-sh/page.h b/include/asm-sh/page.h index 4c6d129e7d91..180467be8e7b 100644 --- a/include/asm-sh/page.h +++ b/include/asm-sh/page.h | |||
@@ -31,6 +31,7 @@ | |||
31 | #define HPAGE_SIZE (1UL << HPAGE_SHIFT) | 31 | #define HPAGE_SIZE (1UL << HPAGE_SHIFT) |
32 | #define HPAGE_MASK (~(HPAGE_SIZE-1)) | 32 | #define HPAGE_MASK (~(HPAGE_SIZE-1)) |
33 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT-PAGE_SHIFT) | 33 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT-PAGE_SHIFT) |
34 | #define ARCH_HAS_SETCLEAR_HUGE_PTE | ||
34 | #endif | 35 | #endif |
35 | 36 | ||
36 | #ifdef __KERNEL__ | 37 | #ifdef __KERNEL__ |
diff --git a/include/asm-sh/pgtable.h b/include/asm-sh/pgtable.h index cd847a47a9aa..ecb909572d3f 100644 --- a/include/asm-sh/pgtable.h +++ b/include/asm-sh/pgtable.h | |||
@@ -196,6 +196,7 @@ static inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _ | |||
196 | static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } | 196 | static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } |
197 | static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } | 197 | static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } |
198 | static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } | 198 | static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } |
199 | static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_SZHUGE)); return pte; } | ||
199 | 200 | ||
200 | /* | 201 | /* |
201 | * Macro and implementation to make a page protection as uncachable. | 202 | * Macro and implementation to make a page protection as uncachable. |
diff --git a/include/asm-sh64/page.h b/include/asm-sh64/page.h index e1f7f5a41210..d6167f1c0e99 100644 --- a/include/asm-sh64/page.h +++ b/include/asm-sh64/page.h | |||
@@ -41,6 +41,7 @@ | |||
41 | #define HPAGE_SIZE (1UL << HPAGE_SHIFT) | 41 | #define HPAGE_SIZE (1UL << HPAGE_SHIFT) |
42 | #define HPAGE_MASK (~(HPAGE_SIZE-1)) | 42 | #define HPAGE_MASK (~(HPAGE_SIZE-1)) |
43 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT-PAGE_SHIFT) | 43 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT-PAGE_SHIFT) |
44 | #define ARCH_HAS_SETCLEAR_HUGE_PTE | ||
44 | #endif | 45 | #endif |
45 | 46 | ||
46 | #ifdef __KERNEL__ | 47 | #ifdef __KERNEL__ |
diff --git a/include/asm-sh64/pgtable.h b/include/asm-sh64/pgtable.h index 525e1523ef5f..78ac6be2d9ef 100644 --- a/include/asm-sh64/pgtable.h +++ b/include/asm-sh64/pgtable.h | |||
@@ -430,6 +430,8 @@ extern inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | | |||
430 | extern inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_EXECUTE)); return pte; } | 430 | extern inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_EXECUTE)); return pte; } |
431 | extern inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } | 431 | extern inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } |
432 | extern inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } | 432 | extern inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } |
433 | extern inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_SZHUGE)); return pte; } | ||
434 | |||
433 | 435 | ||
434 | /* | 436 | /* |
435 | * Conversion functions: convert a page and protection to a page entry. | 437 | * Conversion functions: convert a page and protection to a page entry. |
diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h index 219ea043a14a..b87dbbd64bc9 100644 --- a/include/asm-sparc64/page.h +++ b/include/asm-sparc64/page.h | |||
@@ -95,6 +95,8 @@ typedef unsigned long pgprot_t; | |||
95 | #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) | 95 | #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) |
96 | #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) | 96 | #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) |
97 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | 97 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) |
98 | #define ARCH_HAS_SETCLEAR_HUGE_PTE | ||
99 | #define ARCH_HAS_HUGETLB_PREFAULT_HOOK | ||
98 | #endif | 100 | #endif |
99 | 101 | ||
100 | #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ | 102 | #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ |
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h index ae2cd5b09a7c..1ae00c5087f1 100644 --- a/include/asm-sparc64/pgtable.h +++ b/include/asm-sparc64/pgtable.h | |||
@@ -286,6 +286,7 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) | |||
286 | #define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_ACCESSED | _PAGE_R)) | 286 | #define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_ACCESSED | _PAGE_R)) |
287 | #define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_WRITE)) | 287 | #define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_WRITE)) |
288 | #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_MODIFIED | _PAGE_W)) | 288 | #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_MODIFIED | _PAGE_W)) |
289 | #define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_SZHUGE)) | ||
289 | 290 | ||
290 | /* to find an entry in a page-table-directory. */ | 291 | /* to find an entry in a page-table-directory. */ |
291 | #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) | 292 | #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) |
diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index f43048035a03..9ce338c3a71e 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) | 28 | #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) |
29 | #define HPAGE_MASK (~(HPAGE_SIZE - 1)) | 29 | #define HPAGE_MASK (~(HPAGE_SIZE - 1)) |
30 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | 30 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) |
31 | #define ARCH_HAS_HUGETLB_CLEAN_STALE_PGTABLE | ||
31 | 32 | ||
32 | #ifdef __KERNEL__ | 33 | #ifdef __KERNEL__ |
33 | #ifndef __ASSEMBLY__ | 34 | #ifndef __ASSEMBLY__ |
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index db2a0efbf573..4eec176c3c39 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h | |||
@@ -253,6 +253,7 @@ extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; | |||
253 | extern inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } | 253 | extern inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } |
254 | static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } | 254 | static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } |
255 | 255 | ||
256 | #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT) | ||
256 | extern inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } | 257 | extern inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } |
257 | extern inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } | 258 | extern inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; } |
258 | extern inline pte_t pte_mkclean(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); return pte; } | 259 | extern inline pte_t pte_mkclean(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); return pte; } |
@@ -263,6 +264,7 @@ extern inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _ | |||
263 | extern inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } | 264 | extern inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } |
264 | extern inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } | 265 | extern inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } |
265 | extern inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } | 266 | extern inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } |
267 | extern inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | __LARGE_PTE)); return pte; } | ||
266 | 268 | ||
267 | struct vm_area_struct; | 269 | struct vm_area_struct; |
268 | 270 | ||
@@ -290,7 +292,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, | |||
290 | */ | 292 | */ |
291 | #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) | 293 | #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) |
292 | 294 | ||
293 | #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT) | ||
294 | static inline int pmd_large(pmd_t pte) { | 295 | static inline int pmd_large(pmd_t pte) { |
295 | return (pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE; | 296 | return (pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE; |
296 | } | 297 | } |
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6af1ae4a8211..f529d1442815 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -4,6 +4,7 @@ | |||
4 | #ifdef CONFIG_HUGETLB_PAGE | 4 | #ifdef CONFIG_HUGETLB_PAGE |
5 | 5 | ||
6 | #include <linux/mempolicy.h> | 6 | #include <linux/mempolicy.h> |
7 | #include <asm/tlbflush.h> | ||
7 | 8 | ||
8 | struct ctl_table; | 9 | struct ctl_table; |
9 | 10 | ||
@@ -22,12 +23,6 @@ int hugetlb_report_meminfo(char *); | |||
22 | int hugetlb_report_node_meminfo(int, char *); | 23 | int hugetlb_report_node_meminfo(int, char *); |
23 | int is_hugepage_mem_enough(size_t); | 24 | int is_hugepage_mem_enough(size_t); |
24 | unsigned long hugetlb_total_pages(void); | 25 | unsigned long hugetlb_total_pages(void); |
25 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | ||
26 | int write); | ||
27 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
28 | pmd_t *pmd, int write); | ||
29 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len); | ||
30 | int pmd_huge(pmd_t pmd); | ||
31 | struct page *alloc_huge_page(void); | 26 | struct page *alloc_huge_page(void); |
32 | void free_huge_page(struct page *); | 27 | void free_huge_page(struct page *); |
33 | 28 | ||
@@ -35,6 +30,17 @@ extern unsigned long max_huge_pages; | |||
35 | extern const unsigned long hugetlb_zero, hugetlb_infinity; | 30 | extern const unsigned long hugetlb_zero, hugetlb_infinity; |
36 | extern int sysctl_hugetlb_shm_group; | 31 | extern int sysctl_hugetlb_shm_group; |
37 | 32 | ||
33 | /* arch callbacks */ | ||
34 | |||
35 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr); | ||
36 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr); | ||
37 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | ||
38 | int write); | ||
39 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
40 | pmd_t *pmd, int write); | ||
41 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len); | ||
42 | int pmd_huge(pmd_t pmd); | ||
43 | |||
38 | #ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE | 44 | #ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE |
39 | #define is_hugepage_only_range(mm, addr, len) 0 | 45 | #define is_hugepage_only_range(mm, addr, len) 0 |
40 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ | 46 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ |
@@ -48,6 +54,28 @@ extern int sysctl_hugetlb_shm_group; | |||
48 | int prepare_hugepage_range(unsigned long addr, unsigned long len); | 54 | int prepare_hugepage_range(unsigned long addr, unsigned long len); |
49 | #endif | 55 | #endif |
50 | 56 | ||
57 | #ifndef ARCH_HAS_SETCLEAR_HUGE_PTE | ||
58 | #define set_huge_pte_at(mm, addr, ptep, pte) set_pte_at(mm, addr, ptep, pte) | ||
59 | #define huge_ptep_get_and_clear(mm, addr, ptep) ptep_get_and_clear(mm, addr, ptep) | ||
60 | #else | ||
61 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | ||
62 | pte_t *ptep, pte_t pte); | ||
63 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | ||
64 | pte_t *ptep); | ||
65 | #endif | ||
66 | |||
67 | #ifndef ARCH_HAS_HUGETLB_PREFAULT_HOOK | ||
68 | #define hugetlb_prefault_arch_hook(mm) do { } while (0) | ||
69 | #else | ||
70 | void hugetlb_prefault_arch_hook(struct mm_struct *mm); | ||
71 | #endif | ||
72 | |||
73 | #ifndef ARCH_HAS_HUGETLB_CLEAN_STALE_PGTABLE | ||
74 | #define hugetlb_clean_stale_pgtable(pte) BUG() | ||
75 | #else | ||
76 | void hugetlb_clean_stale_pgtable(pte_t *pte); | ||
77 | #endif | ||
78 | |||
51 | #else /* !CONFIG_HUGETLB_PAGE */ | 79 | #else /* !CONFIG_HUGETLB_PAGE */ |
52 | 80 | ||
53 | static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) | 81 | static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4eb5ae3fbe10..fbd1111ea119 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -7,10 +7,14 @@ | |||
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/hugetlb.h> | ||
11 | #include <linux/sysctl.h> | 10 | #include <linux/sysctl.h> |
12 | #include <linux/highmem.h> | 11 | #include <linux/highmem.h> |
13 | #include <linux/nodemask.h> | 12 | #include <linux/nodemask.h> |
13 | #include <linux/pagemap.h> | ||
14 | #include <asm/page.h> | ||
15 | #include <asm/pgtable.h> | ||
16 | |||
17 | #include <linux/hugetlb.h> | ||
14 | 18 | ||
15 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; | 19 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; |
16 | static unsigned long nr_huge_pages, free_huge_pages; | 20 | static unsigned long nr_huge_pages, free_huge_pages; |
@@ -249,6 +253,72 @@ struct vm_operations_struct hugetlb_vm_ops = { | |||
249 | .nopage = hugetlb_nopage, | 253 | .nopage = hugetlb_nopage, |
250 | }; | 254 | }; |
251 | 255 | ||
256 | static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page) | ||
257 | { | ||
258 | pte_t entry; | ||
259 | |||
260 | if (vma->vm_flags & VM_WRITE) { | ||
261 | entry = | ||
262 | pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); | ||
263 | } else { | ||
264 | entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); | ||
265 | } | ||
266 | entry = pte_mkyoung(entry); | ||
267 | entry = pte_mkhuge(entry); | ||
268 | |||
269 | return entry; | ||
270 | } | ||
271 | |||
272 | int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | ||
273 | struct vm_area_struct *vma) | ||
274 | { | ||
275 | pte_t *src_pte, *dst_pte, entry; | ||
276 | struct page *ptepage; | ||
277 | unsigned long addr = vma->vm_start; | ||
278 | unsigned long end = vma->vm_end; | ||
279 | |||
280 | while (addr < end) { | ||
281 | dst_pte = huge_pte_alloc(dst, addr); | ||
282 | if (!dst_pte) | ||
283 | goto nomem; | ||
284 | src_pte = huge_pte_offset(src, addr); | ||
285 | BUG_ON(!src_pte || pte_none(*src_pte)); /* prefaulted */ | ||
286 | entry = *src_pte; | ||
287 | ptepage = pte_page(entry); | ||
288 | get_page(ptepage); | ||
289 | add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); | ||
290 | set_huge_pte_at(dst, addr, dst_pte, entry); | ||
291 | addr += HPAGE_SIZE; | ||
292 | } | ||
293 | return 0; | ||
294 | |||
295 | nomem: | ||
296 | return -ENOMEM; | ||
297 | } | ||
298 | |||
299 | void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | ||
300 | unsigned long end) | ||
301 | { | ||
302 | struct mm_struct *mm = vma->vm_mm; | ||
303 | unsigned long address; | ||
304 | pte_t pte; | ||
305 | struct page *page; | ||
306 | |||
307 | WARN_ON(!is_vm_hugetlb_page(vma)); | ||
308 | BUG_ON(start & ~HPAGE_MASK); | ||
309 | BUG_ON(end & ~HPAGE_MASK); | ||
310 | |||
311 | for (address = start; address < end; address += HPAGE_SIZE) { | ||
312 | pte = huge_ptep_get_and_clear(mm, address, huge_pte_offset(mm, address)); | ||
313 | if (pte_none(pte)) | ||
314 | continue; | ||
315 | page = pte_page(pte); | ||
316 | put_page(page); | ||
317 | } | ||
318 | add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT)); | ||
319 | flush_tlb_range(vma, start, end); | ||
320 | } | ||
321 | |||
252 | void zap_hugepage_range(struct vm_area_struct *vma, | 322 | void zap_hugepage_range(struct vm_area_struct *vma, |
253 | unsigned long start, unsigned long length) | 323 | unsigned long start, unsigned long length) |
254 | { | 324 | { |
@@ -258,3 +328,108 @@ void zap_hugepage_range(struct vm_area_struct *vma, | |||
258 | unmap_hugepage_range(vma, start, start + length); | 328 | unmap_hugepage_range(vma, start, start + length); |
259 | spin_unlock(&mm->page_table_lock); | 329 | spin_unlock(&mm->page_table_lock); |
260 | } | 330 | } |
331 | |||
332 | int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) | ||
333 | { | ||
334 | struct mm_struct *mm = current->mm; | ||
335 | unsigned long addr; | ||
336 | int ret = 0; | ||
337 | |||
338 | WARN_ON(!is_vm_hugetlb_page(vma)); | ||
339 | BUG_ON(vma->vm_start & ~HPAGE_MASK); | ||
340 | BUG_ON(vma->vm_end & ~HPAGE_MASK); | ||
341 | |||
342 | hugetlb_prefault_arch_hook(mm); | ||
343 | |||
344 | spin_lock(&mm->page_table_lock); | ||
345 | for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { | ||
346 | unsigned long idx; | ||
347 | pte_t *pte = huge_pte_alloc(mm, addr); | ||
348 | struct page *page; | ||
349 | |||
350 | if (!pte) { | ||
351 | ret = -ENOMEM; | ||
352 | goto out; | ||
353 | } | ||
354 | if (! pte_none(*pte)) | ||
355 | hugetlb_clean_stale_pgtable(pte); | ||
356 | |||
357 | idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) | ||
358 | + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | ||
359 | page = find_get_page(mapping, idx); | ||
360 | if (!page) { | ||
361 | /* charge the fs quota first */ | ||
362 | if (hugetlb_get_quota(mapping)) { | ||
363 | ret = -ENOMEM; | ||
364 | goto out; | ||
365 | } | ||
366 | page = alloc_huge_page(); | ||
367 | if (!page) { | ||
368 | hugetlb_put_quota(mapping); | ||
369 | ret = -ENOMEM; | ||
370 | goto out; | ||
371 | } | ||
372 | ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); | ||
373 | if (! ret) { | ||
374 | unlock_page(page); | ||
375 | } else { | ||
376 | hugetlb_put_quota(mapping); | ||
377 | free_huge_page(page); | ||
378 | goto out; | ||
379 | } | ||
380 | } | ||
381 | add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); | ||
382 | set_huge_pte_at(mm, addr, pte, make_huge_pte(vma, page)); | ||
383 | } | ||
384 | out: | ||
385 | spin_unlock(&mm->page_table_lock); | ||
386 | return ret; | ||
387 | } | ||
388 | |||
389 | int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
390 | struct page **pages, struct vm_area_struct **vmas, | ||
391 | unsigned long *position, int *length, int i) | ||
392 | { | ||
393 | unsigned long vpfn, vaddr = *position; | ||
394 | int remainder = *length; | ||
395 | |||
396 | BUG_ON(!is_vm_hugetlb_page(vma)); | ||
397 | |||
398 | vpfn = vaddr/PAGE_SIZE; | ||
399 | while (vaddr < vma->vm_end && remainder) { | ||
400 | |||
401 | if (pages) { | ||
402 | pte_t *pte; | ||
403 | struct page *page; | ||
404 | |||
405 | /* Some archs (sparc64, sh*) have multiple | ||
406 | * pte_ts to each hugepage. We have to make | ||
407 | * sure we get the first, for the page | ||
408 | * indexing below to work. */ | ||
409 | pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); | ||
410 | |||
411 | /* hugetlb should be locked, and hence, prefaulted */ | ||
412 | WARN_ON(!pte || pte_none(*pte)); | ||
413 | |||
414 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; | ||
415 | |||
416 | WARN_ON(!PageCompound(page)); | ||
417 | |||
418 | get_page(page); | ||
419 | pages[i] = page; | ||
420 | } | ||
421 | |||
422 | if (vmas) | ||
423 | vmas[i] = vma; | ||
424 | |||
425 | vaddr += PAGE_SIZE; | ||
426 | ++vpfn; | ||
427 | --remainder; | ||
428 | ++i; | ||
429 | } | ||
430 | |||
431 | *length = remainder; | ||
432 | *position = vaddr; | ||
433 | |||
434 | return i; | ||
435 | } | ||