diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-08-14 06:19:59 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-08-14 06:19:59 -0400 |
commit | 8d7ccaa545490cdffdfaff0842436a8dd85cf47b (patch) | |
tree | 8129b5907161bc6ae26deb3645ce1e280c5e1f51 /arch/x86/mm | |
parent | b2139aa0eec330c711c5a279db361e5ef1178e78 (diff) | |
parent | 30a2f3c60a84092c8084dfe788b710f8d0768cd4 (diff) |
Merge commit 'v2.6.27-rc3' into x86/prototypes
Conflicts:
include/asm-x86/dma-mapping.h
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/mm/discontig_32.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 10 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 298 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 78 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 37 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/pgtable_32.c | 47 |
10 files changed, 382 insertions, 108 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 1fbb844c3d7a..dfb932dcf136 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | 1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ |
2 | pat.o pgtable.o | 2 | pat.o pgtable.o gup.o |
3 | 3 | ||
4 | obj-$(CONFIG_X86_32) += pgtable_32.o | 4 | obj-$(CONFIG_X86_32) += pgtable_32.o |
5 | 5 | ||
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 5dfef9fa061a..62fa440678d8 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
@@ -42,7 +42,6 @@ | |||
42 | 42 | ||
43 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 43 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
44 | EXPORT_SYMBOL(node_data); | 44 | EXPORT_SYMBOL(node_data); |
45 | static bootmem_data_t node0_bdata; | ||
46 | 45 | ||
47 | /* | 46 | /* |
48 | * numa interface - we expect the numa architecture specific code to have | 47 | * numa interface - we expect the numa architecture specific code to have |
@@ -385,7 +384,7 @@ void __init initmem_init(unsigned long start_pfn, | |||
385 | for_each_online_node(nid) | 384 | for_each_online_node(nid) |
386 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | 385 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); |
387 | 386 | ||
388 | NODE_DATA(0)->bdata = &node0_bdata; | 387 | NODE_DATA(0)->bdata = &bootmem_node_data[0]; |
389 | setup_bootmem_allocator(); | 388 | setup_bootmem_allocator(); |
390 | } | 389 | } |
391 | 390 | ||
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 0bb0caed8971..a20d1fa64b4e 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -148,8 +148,8 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
148 | * we have now. "break" is either changing perms, levels or | 148 | * we have now. "break" is either changing perms, levels or |
149 | * address space marker. | 149 | * address space marker. |
150 | */ | 150 | */ |
151 | prot = pgprot_val(new_prot) & ~(PTE_MASK); | 151 | prot = pgprot_val(new_prot) & ~(PTE_PFN_MASK); |
152 | cur = pgprot_val(st->current_prot) & ~(PTE_MASK); | 152 | cur = pgprot_val(st->current_prot) & ~(PTE_PFN_MASK); |
153 | 153 | ||
154 | if (!st->level) { | 154 | if (!st->level) { |
155 | /* First entry */ | 155 | /* First entry */ |
@@ -221,7 +221,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, | |||
221 | for (i = 0; i < PTRS_PER_PMD; i++) { | 221 | for (i = 0; i < PTRS_PER_PMD; i++) { |
222 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); | 222 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
223 | if (!pmd_none(*start)) { | 223 | if (!pmd_none(*start)) { |
224 | pgprotval_t prot = pmd_val(*start) & ~PTE_MASK; | 224 | pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK; |
225 | 225 | ||
226 | if (pmd_large(*start) || !pmd_present(*start)) | 226 | if (pmd_large(*start) || !pmd_present(*start)) |
227 | note_page(m, st, __pgprot(prot), 3); | 227 | note_page(m, st, __pgprot(prot), 3); |
@@ -253,7 +253,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |||
253 | for (i = 0; i < PTRS_PER_PUD; i++) { | 253 | for (i = 0; i < PTRS_PER_PUD; i++) { |
254 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); | 254 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
255 | if (!pud_none(*start)) { | 255 | if (!pud_none(*start)) { |
256 | pgprotval_t prot = pud_val(*start) & ~PTE_MASK; | 256 | pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; |
257 | 257 | ||
258 | if (pud_large(*start) || !pud_present(*start)) | 258 | if (pud_large(*start) || !pud_present(*start)) |
259 | note_page(m, st, __pgprot(prot), 2); | 259 | note_page(m, st, __pgprot(prot), 2); |
@@ -288,7 +288,7 @@ static void walk_pgd_level(struct seq_file *m) | |||
288 | for (i = 0; i < PTRS_PER_PGD; i++) { | 288 | for (i = 0; i < PTRS_PER_PGD; i++) { |
289 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); | 289 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
290 | if (!pgd_none(*start)) { | 290 | if (!pgd_none(*start)) { |
291 | pgprotval_t prot = pgd_val(*start) & ~PTE_MASK; | 291 | pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK; |
292 | 292 | ||
293 | if (pgd_large(*start) || !pgd_present(*start)) | 293 | if (pgd_large(*start) || !pgd_present(*start)) |
294 | note_page(m, &st, __pgprot(prot), 1); | 294 | note_page(m, &st, __pgprot(prot), 1); |
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c new file mode 100644 index 000000000000..007bb06c7504 --- /dev/null +++ b/arch/x86/mm/gup.c | |||
@@ -0,0 +1,298 @@ | |||
1 | /* | ||
2 | * Lockless get_user_pages_fast for x86 | ||
3 | * | ||
4 | * Copyright (C) 2008 Nick Piggin | ||
5 | * Copyright (C) 2008 Novell Inc. | ||
6 | */ | ||
7 | #include <linux/sched.h> | ||
8 | #include <linux/mm.h> | ||
9 | #include <linux/vmstat.h> | ||
10 | #include <linux/highmem.h> | ||
11 | |||
12 | #include <asm/pgtable.h> | ||
13 | |||
14 | static inline pte_t gup_get_pte(pte_t *ptep) | ||
15 | { | ||
16 | #ifndef CONFIG_X86_PAE | ||
17 | return *ptep; | ||
18 | #else | ||
19 | /* | ||
20 | * With get_user_pages_fast, we walk down the pagetables without taking | ||
21 | * any locks. For this we would like to load the pointers atoimcally, | ||
22 | * but that is not possible (without expensive cmpxchg8b) on PAE. What | ||
23 | * we do have is the guarantee that a pte will only either go from not | ||
24 | * present to present, or present to not present or both -- it will not | ||
25 | * switch to a completely different present page without a TLB flush in | ||
26 | * between; something that we are blocking by holding interrupts off. | ||
27 | * | ||
28 | * Setting ptes from not present to present goes: | ||
29 | * ptep->pte_high = h; | ||
30 | * smp_wmb(); | ||
31 | * ptep->pte_low = l; | ||
32 | * | ||
33 | * And present to not present goes: | ||
34 | * ptep->pte_low = 0; | ||
35 | * smp_wmb(); | ||
36 | * ptep->pte_high = 0; | ||
37 | * | ||
38 | * We must ensure here that the load of pte_low sees l iff pte_high | ||
39 | * sees h. We load pte_high *after* loading pte_low, which ensures we | ||
40 | * don't see an older value of pte_high. *Then* we recheck pte_low, | ||
41 | * which ensures that we haven't picked up a changed pte high. We might | ||
42 | * have got rubbish values from pte_low and pte_high, but we are | ||
43 | * guaranteed that pte_low will not have the present bit set *unless* | ||
44 | * it is 'l'. And get_user_pages_fast only operates on present ptes, so | ||
45 | * we're safe. | ||
46 | * | ||
47 | * gup_get_pte should not be used or copied outside gup.c without being | ||
48 | * very careful -- it does not atomically load the pte or anything that | ||
49 | * is likely to be useful for you. | ||
50 | */ | ||
51 | pte_t pte; | ||
52 | |||
53 | retry: | ||
54 | pte.pte_low = ptep->pte_low; | ||
55 | smp_rmb(); | ||
56 | pte.pte_high = ptep->pte_high; | ||
57 | smp_rmb(); | ||
58 | if (unlikely(pte.pte_low != ptep->pte_low)) | ||
59 | goto retry; | ||
60 | |||
61 | return pte; | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * The performance critical leaf functions are made noinline otherwise gcc | ||
67 | * inlines everything into a single function which results in too much | ||
68 | * register pressure. | ||
69 | */ | ||
70 | static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | ||
71 | unsigned long end, int write, struct page **pages, int *nr) | ||
72 | { | ||
73 | unsigned long mask; | ||
74 | pte_t *ptep; | ||
75 | |||
76 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
77 | if (write) | ||
78 | mask |= _PAGE_RW; | ||
79 | |||
80 | ptep = pte_offset_map(&pmd, addr); | ||
81 | do { | ||
82 | pte_t pte = gup_get_pte(ptep); | ||
83 | struct page *page; | ||
84 | |||
85 | if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) { | ||
86 | pte_unmap(ptep); | ||
87 | return 0; | ||
88 | } | ||
89 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
90 | page = pte_page(pte); | ||
91 | get_page(page); | ||
92 | pages[*nr] = page; | ||
93 | (*nr)++; | ||
94 | |||
95 | } while (ptep++, addr += PAGE_SIZE, addr != end); | ||
96 | pte_unmap(ptep - 1); | ||
97 | |||
98 | return 1; | ||
99 | } | ||
100 | |||
101 | static inline void get_head_page_multiple(struct page *page, int nr) | ||
102 | { | ||
103 | VM_BUG_ON(page != compound_head(page)); | ||
104 | VM_BUG_ON(page_count(page) == 0); | ||
105 | atomic_add(nr, &page->_count); | ||
106 | } | ||
107 | |||
108 | static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | ||
109 | unsigned long end, int write, struct page **pages, int *nr) | ||
110 | { | ||
111 | unsigned long mask; | ||
112 | pte_t pte = *(pte_t *)&pmd; | ||
113 | struct page *head, *page; | ||
114 | int refs; | ||
115 | |||
116 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
117 | if (write) | ||
118 | mask |= _PAGE_RW; | ||
119 | if ((pte_val(pte) & mask) != mask) | ||
120 | return 0; | ||
121 | /* hugepages are never "special" */ | ||
122 | VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); | ||
123 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
124 | |||
125 | refs = 0; | ||
126 | head = pte_page(pte); | ||
127 | page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); | ||
128 | do { | ||
129 | VM_BUG_ON(compound_head(page) != head); | ||
130 | pages[*nr] = page; | ||
131 | (*nr)++; | ||
132 | page++; | ||
133 | refs++; | ||
134 | } while (addr += PAGE_SIZE, addr != end); | ||
135 | get_head_page_multiple(head, refs); | ||
136 | |||
137 | return 1; | ||
138 | } | ||
139 | |||
140 | static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | ||
141 | int write, struct page **pages, int *nr) | ||
142 | { | ||
143 | unsigned long next; | ||
144 | pmd_t *pmdp; | ||
145 | |||
146 | pmdp = pmd_offset(&pud, addr); | ||
147 | do { | ||
148 | pmd_t pmd = *pmdp; | ||
149 | |||
150 | next = pmd_addr_end(addr, end); | ||
151 | if (pmd_none(pmd)) | ||
152 | return 0; | ||
153 | if (unlikely(pmd_large(pmd))) { | ||
154 | if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) | ||
155 | return 0; | ||
156 | } else { | ||
157 | if (!gup_pte_range(pmd, addr, next, write, pages, nr)) | ||
158 | return 0; | ||
159 | } | ||
160 | } while (pmdp++, addr = next, addr != end); | ||
161 | |||
162 | return 1; | ||
163 | } | ||
164 | |||
165 | static noinline int gup_huge_pud(pud_t pud, unsigned long addr, | ||
166 | unsigned long end, int write, struct page **pages, int *nr) | ||
167 | { | ||
168 | unsigned long mask; | ||
169 | pte_t pte = *(pte_t *)&pud; | ||
170 | struct page *head, *page; | ||
171 | int refs; | ||
172 | |||
173 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
174 | if (write) | ||
175 | mask |= _PAGE_RW; | ||
176 | if ((pte_val(pte) & mask) != mask) | ||
177 | return 0; | ||
178 | /* hugepages are never "special" */ | ||
179 | VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); | ||
180 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
181 | |||
182 | refs = 0; | ||
183 | head = pte_page(pte); | ||
184 | page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); | ||
185 | do { | ||
186 | VM_BUG_ON(compound_head(page) != head); | ||
187 | pages[*nr] = page; | ||
188 | (*nr)++; | ||
189 | page++; | ||
190 | refs++; | ||
191 | } while (addr += PAGE_SIZE, addr != end); | ||
192 | get_head_page_multiple(head, refs); | ||
193 | |||
194 | return 1; | ||
195 | } | ||
196 | |||
197 | static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | ||
198 | int write, struct page **pages, int *nr) | ||
199 | { | ||
200 | unsigned long next; | ||
201 | pud_t *pudp; | ||
202 | |||
203 | pudp = pud_offset(&pgd, addr); | ||
204 | do { | ||
205 | pud_t pud = *pudp; | ||
206 | |||
207 | next = pud_addr_end(addr, end); | ||
208 | if (pud_none(pud)) | ||
209 | return 0; | ||
210 | if (unlikely(pud_large(pud))) { | ||
211 | if (!gup_huge_pud(pud, addr, next, write, pages, nr)) | ||
212 | return 0; | ||
213 | } else { | ||
214 | if (!gup_pmd_range(pud, addr, next, write, pages, nr)) | ||
215 | return 0; | ||
216 | } | ||
217 | } while (pudp++, addr = next, addr != end); | ||
218 | |||
219 | return 1; | ||
220 | } | ||
221 | |||
222 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
223 | struct page **pages) | ||
224 | { | ||
225 | struct mm_struct *mm = current->mm; | ||
226 | unsigned long addr, len, end; | ||
227 | unsigned long next; | ||
228 | pgd_t *pgdp; | ||
229 | int nr = 0; | ||
230 | |||
231 | start &= PAGE_MASK; | ||
232 | addr = start; | ||
233 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
234 | end = start + len; | ||
235 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | ||
236 | start, len))) | ||
237 | goto slow_irqon; | ||
238 | |||
239 | /* | ||
240 | * XXX: batch / limit 'nr', to avoid large irq off latency | ||
241 | * needs some instrumenting to determine the common sizes used by | ||
242 | * important workloads (eg. DB2), and whether limiting the batch size | ||
243 | * will decrease performance. | ||
244 | * | ||
245 | * It seems like we're in the clear for the moment. Direct-IO is | ||
246 | * the main guy that batches up lots of get_user_pages, and even | ||
247 | * they are limited to 64-at-a-time which is not so many. | ||
248 | */ | ||
249 | /* | ||
250 | * This doesn't prevent pagetable teardown, but does prevent | ||
251 | * the pagetables and pages from being freed on x86. | ||
252 | * | ||
253 | * So long as we atomically load page table pointers versus teardown | ||
254 | * (which we do on x86, with the above PAE exception), we can follow the | ||
255 | * address down to the the page and take a ref on it. | ||
256 | */ | ||
257 | local_irq_disable(); | ||
258 | pgdp = pgd_offset(mm, addr); | ||
259 | do { | ||
260 | pgd_t pgd = *pgdp; | ||
261 | |||
262 | next = pgd_addr_end(addr, end); | ||
263 | if (pgd_none(pgd)) | ||
264 | goto slow; | ||
265 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
266 | goto slow; | ||
267 | } while (pgdp++, addr = next, addr != end); | ||
268 | local_irq_enable(); | ||
269 | |||
270 | VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); | ||
271 | return nr; | ||
272 | |||
273 | { | ||
274 | int ret; | ||
275 | |||
276 | slow: | ||
277 | local_irq_enable(); | ||
278 | slow_irqon: | ||
279 | /* Try to get the remaining pages with get_user_pages */ | ||
280 | start += nr << PAGE_SHIFT; | ||
281 | pages += nr; | ||
282 | |||
283 | down_read(&mm->mmap_sem); | ||
284 | ret = get_user_pages(current, mm, start, | ||
285 | (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); | ||
286 | up_read(&mm->mmap_sem); | ||
287 | |||
288 | /* Have to be a bit careful with return values */ | ||
289 | if (nr > 0) { | ||
290 | if (ret < 0) | ||
291 | ret = nr; | ||
292 | else | ||
293 | ret += nr; | ||
294 | } | ||
295 | |||
296 | return ret; | ||
297 | } | ||
298 | } | ||
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 0b3d567e686d..8f307d914c2e 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -124,7 +124,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | |||
124 | return 1; | 124 | return 1; |
125 | } | 125 | } |
126 | 126 | ||
127 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 127 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
128 | unsigned long addr, unsigned long sz) | ||
128 | { | 129 | { |
129 | pgd_t *pgd; | 130 | pgd_t *pgd; |
130 | pud_t *pud; | 131 | pud_t *pud; |
@@ -133,9 +134,14 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
133 | pgd = pgd_offset(mm, addr); | 134 | pgd = pgd_offset(mm, addr); |
134 | pud = pud_alloc(mm, pgd, addr); | 135 | pud = pud_alloc(mm, pgd, addr); |
135 | if (pud) { | 136 | if (pud) { |
136 | if (pud_none(*pud)) | 137 | if (sz == PUD_SIZE) { |
137 | huge_pmd_share(mm, addr, pud); | 138 | pte = (pte_t *)pud; |
138 | pte = (pte_t *) pmd_alloc(mm, pud, addr); | 139 | } else { |
140 | BUG_ON(sz != PMD_SIZE); | ||
141 | if (pud_none(*pud)) | ||
142 | huge_pmd_share(mm, addr, pud); | ||
143 | pte = (pte_t *) pmd_alloc(mm, pud, addr); | ||
144 | } | ||
139 | } | 145 | } |
140 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); | 146 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); |
141 | 147 | ||
@@ -151,8 +157,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
151 | pgd = pgd_offset(mm, addr); | 157 | pgd = pgd_offset(mm, addr); |
152 | if (pgd_present(*pgd)) { | 158 | if (pgd_present(*pgd)) { |
153 | pud = pud_offset(pgd, addr); | 159 | pud = pud_offset(pgd, addr); |
154 | if (pud_present(*pud)) | 160 | if (pud_present(*pud)) { |
161 | if (pud_large(*pud)) | ||
162 | return (pte_t *)pud; | ||
155 | pmd = pmd_offset(pud, addr); | 163 | pmd = pmd_offset(pud, addr); |
164 | } | ||
156 | } | 165 | } |
157 | return (pte_t *) pmd; | 166 | return (pte_t *) pmd; |
158 | } | 167 | } |
@@ -188,6 +197,11 @@ int pmd_huge(pmd_t pmd) | |||
188 | return 0; | 197 | return 0; |
189 | } | 198 | } |
190 | 199 | ||
200 | int pud_huge(pud_t pud) | ||
201 | { | ||
202 | return 0; | ||
203 | } | ||
204 | |||
191 | struct page * | 205 | struct page * |
192 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 206 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
193 | pmd_t *pmd, int write) | 207 | pmd_t *pmd, int write) |
@@ -208,6 +222,11 @@ int pmd_huge(pmd_t pmd) | |||
208 | return !!(pmd_val(pmd) & _PAGE_PSE); | 222 | return !!(pmd_val(pmd) & _PAGE_PSE); |
209 | } | 223 | } |
210 | 224 | ||
225 | int pud_huge(pud_t pud) | ||
226 | { | ||
227 | return !!(pud_val(pud) & _PAGE_PSE); | ||
228 | } | ||
229 | |||
211 | struct page * | 230 | struct page * |
212 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 231 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
213 | pmd_t *pmd, int write) | 232 | pmd_t *pmd, int write) |
@@ -216,9 +235,22 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
216 | 235 | ||
217 | page = pte_page(*(pte_t *)pmd); | 236 | page = pte_page(*(pte_t *)pmd); |
218 | if (page) | 237 | if (page) |
219 | page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); | 238 | page += ((address & ~PMD_MASK) >> PAGE_SHIFT); |
239 | return page; | ||
240 | } | ||
241 | |||
242 | struct page * | ||
243 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
244 | pud_t *pud, int write) | ||
245 | { | ||
246 | struct page *page; | ||
247 | |||
248 | page = pte_page(*(pte_t *)pud); | ||
249 | if (page) | ||
250 | page += ((address & ~PUD_MASK) >> PAGE_SHIFT); | ||
220 | return page; | 251 | return page; |
221 | } | 252 | } |
253 | |||
222 | #endif | 254 | #endif |
223 | 255 | ||
224 | /* x86_64 also uses this file */ | 256 | /* x86_64 also uses this file */ |
@@ -228,6 +260,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | |||
228 | unsigned long addr, unsigned long len, | 260 | unsigned long addr, unsigned long len, |
229 | unsigned long pgoff, unsigned long flags) | 261 | unsigned long pgoff, unsigned long flags) |
230 | { | 262 | { |
263 | struct hstate *h = hstate_file(file); | ||
231 | struct mm_struct *mm = current->mm; | 264 | struct mm_struct *mm = current->mm; |
232 | struct vm_area_struct *vma; | 265 | struct vm_area_struct *vma; |
233 | unsigned long start_addr; | 266 | unsigned long start_addr; |
@@ -240,7 +273,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | |||
240 | } | 273 | } |
241 | 274 | ||
242 | full_search: | 275 | full_search: |
243 | addr = ALIGN(start_addr, HPAGE_SIZE); | 276 | addr = ALIGN(start_addr, huge_page_size(h)); |
244 | 277 | ||
245 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { | 278 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { |
246 | /* At this point: (!vma || addr < vma->vm_end). */ | 279 | /* At this point: (!vma || addr < vma->vm_end). */ |
@@ -262,7 +295,7 @@ full_search: | |||
262 | } | 295 | } |
263 | if (addr + mm->cached_hole_size < vma->vm_start) | 296 | if (addr + mm->cached_hole_size < vma->vm_start) |
264 | mm->cached_hole_size = vma->vm_start - addr; | 297 | mm->cached_hole_size = vma->vm_start - addr; |
265 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | 298 | addr = ALIGN(vma->vm_end, huge_page_size(h)); |
266 | } | 299 | } |
267 | } | 300 | } |
268 | 301 | ||
@@ -270,6 +303,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, | |||
270 | unsigned long addr0, unsigned long len, | 303 | unsigned long addr0, unsigned long len, |
271 | unsigned long pgoff, unsigned long flags) | 304 | unsigned long pgoff, unsigned long flags) |
272 | { | 305 | { |
306 | struct hstate *h = hstate_file(file); | ||
273 | struct mm_struct *mm = current->mm; | 307 | struct mm_struct *mm = current->mm; |
274 | struct vm_area_struct *vma, *prev_vma; | 308 | struct vm_area_struct *vma, *prev_vma; |
275 | unsigned long base = mm->mmap_base, addr = addr0; | 309 | unsigned long base = mm->mmap_base, addr = addr0; |
@@ -290,7 +324,7 @@ try_again: | |||
290 | goto fail; | 324 | goto fail; |
291 | 325 | ||
292 | /* either no address requested or cant fit in requested address hole */ | 326 | /* either no address requested or cant fit in requested address hole */ |
293 | addr = (mm->free_area_cache - len) & HPAGE_MASK; | 327 | addr = (mm->free_area_cache - len) & huge_page_mask(h); |
294 | do { | 328 | do { |
295 | /* | 329 | /* |
296 | * Lookup failure means no vma is above this address, | 330 | * Lookup failure means no vma is above this address, |
@@ -321,7 +355,7 @@ try_again: | |||
321 | largest_hole = vma->vm_start - addr; | 355 | largest_hole = vma->vm_start - addr; |
322 | 356 | ||
323 | /* try just below the current vma->vm_start */ | 357 | /* try just below the current vma->vm_start */ |
324 | addr = (vma->vm_start - len) & HPAGE_MASK; | 358 | addr = (vma->vm_start - len) & huge_page_mask(h); |
325 | } while (len <= vma->vm_start); | 359 | } while (len <= vma->vm_start); |
326 | 360 | ||
327 | fail: | 361 | fail: |
@@ -359,22 +393,23 @@ unsigned long | |||
359 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | 393 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
360 | unsigned long len, unsigned long pgoff, unsigned long flags) | 394 | unsigned long len, unsigned long pgoff, unsigned long flags) |
361 | { | 395 | { |
396 | struct hstate *h = hstate_file(file); | ||
362 | struct mm_struct *mm = current->mm; | 397 | struct mm_struct *mm = current->mm; |
363 | struct vm_area_struct *vma; | 398 | struct vm_area_struct *vma; |
364 | 399 | ||
365 | if (len & ~HPAGE_MASK) | 400 | if (len & ~huge_page_mask(h)) |
366 | return -EINVAL; | 401 | return -EINVAL; |
367 | if (len > TASK_SIZE) | 402 | if (len > TASK_SIZE) |
368 | return -ENOMEM; | 403 | return -ENOMEM; |
369 | 404 | ||
370 | if (flags & MAP_FIXED) { | 405 | if (flags & MAP_FIXED) { |
371 | if (prepare_hugepage_range(addr, len)) | 406 | if (prepare_hugepage_range(file, addr, len)) |
372 | return -EINVAL; | 407 | return -EINVAL; |
373 | return addr; | 408 | return addr; |
374 | } | 409 | } |
375 | 410 | ||
376 | if (addr) { | 411 | if (addr) { |
377 | addr = ALIGN(addr, HPAGE_SIZE); | 412 | addr = ALIGN(addr, huge_page_size(h)); |
378 | vma = find_vma(mm, addr); | 413 | vma = find_vma(mm, addr); |
379 | if (TASK_SIZE - len >= addr && | 414 | if (TASK_SIZE - len >= addr && |
380 | (!vma || addr + len <= vma->vm_start)) | 415 | (!vma || addr + len <= vma->vm_start)) |
@@ -390,3 +425,20 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
390 | 425 | ||
391 | #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ | 426 | #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ |
392 | 427 | ||
428 | #ifdef CONFIG_X86_64 | ||
429 | static __init int setup_hugepagesz(char *opt) | ||
430 | { | ||
431 | unsigned long ps = memparse(opt, &opt); | ||
432 | if (ps == PMD_SIZE) { | ||
433 | hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); | ||
434 | } else if (ps == PUD_SIZE && cpu_has_gbpages) { | ||
435 | hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); | ||
436 | } else { | ||
437 | printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n", | ||
438 | ps >> 20); | ||
439 | return 0; | ||
440 | } | ||
441 | return 1; | ||
442 | } | ||
443 | __setup("hugepagesz=", setup_hugepagesz); | ||
444 | #endif | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ec37121f6709..129618ca0ea2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -86,43 +86,6 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
86 | * around without checking the pgd every time. | 86 | * around without checking the pgd every time. |
87 | */ | 87 | */ |
88 | 88 | ||
89 | void show_mem(void) | ||
90 | { | ||
91 | long i, total = 0, reserved = 0; | ||
92 | long shared = 0, cached = 0; | ||
93 | struct page *page; | ||
94 | pg_data_t *pgdat; | ||
95 | |||
96 | printk(KERN_INFO "Mem-info:\n"); | ||
97 | show_free_areas(); | ||
98 | for_each_online_pgdat(pgdat) { | ||
99 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | ||
100 | /* | ||
101 | * This loop can take a while with 256 GB and | ||
102 | * 4k pages so defer the NMI watchdog: | ||
103 | */ | ||
104 | if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | ||
105 | touch_nmi_watchdog(); | ||
106 | |||
107 | if (!pfn_valid(pgdat->node_start_pfn + i)) | ||
108 | continue; | ||
109 | |||
110 | page = pfn_to_page(pgdat->node_start_pfn + i); | ||
111 | total++; | ||
112 | if (PageReserved(page)) | ||
113 | reserved++; | ||
114 | else if (PageSwapCache(page)) | ||
115 | cached++; | ||
116 | else if (page_count(page)) | ||
117 | shared += page_count(page) - 1; | ||
118 | } | ||
119 | } | ||
120 | printk(KERN_INFO "%lu pages of RAM\n", total); | ||
121 | printk(KERN_INFO "%lu reserved pages\n", reserved); | ||
122 | printk(KERN_INFO "%lu pages shared\n", shared); | ||
123 | printk(KERN_INFO "%lu pages swap cached\n", cached); | ||
124 | } | ||
125 | |||
126 | int after_bootmem; | 89 | int after_bootmem; |
127 | 90 | ||
128 | static __init void *spp_getpage(void) | 91 | static __init void *spp_getpage(void) |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 19fd9a3c5210..fba57be9af68 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -330,6 +330,14 @@ static void __iomem *ioremap_default(resource_size_t phys_addr, | |||
330 | return (void __iomem *)ret; | 330 | return (void __iomem *)ret; |
331 | } | 331 | } |
332 | 332 | ||
333 | void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, | ||
334 | unsigned long prot_val) | ||
335 | { | ||
336 | return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK), | ||
337 | __builtin_return_address(0)); | ||
338 | } | ||
339 | EXPORT_SYMBOL(ioremap_prot); | ||
340 | |||
333 | /** | 341 | /** |
334 | * iounmap - Free a IO remapping | 342 | * iounmap - Free a IO remapping |
335 | * @addr: virtual address from ioremap_* | 343 | * @addr: virtual address from ioremap_* |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9782f42dd319..a4dd793d6003 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -23,8 +23,6 @@ | |||
23 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 23 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
24 | EXPORT_SYMBOL(node_data); | 24 | EXPORT_SYMBOL(node_data); |
25 | 25 | ||
26 | static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; | ||
27 | |||
28 | struct memnode memnode; | 26 | struct memnode memnode; |
29 | 27 | ||
30 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | 28 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { |
@@ -198,7 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
198 | nodedata_phys + pgdat_size - 1); | 196 | nodedata_phys + pgdat_size - 1); |
199 | 197 | ||
200 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 198 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
201 | NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; | 199 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; |
202 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 200 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
203 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; | 201 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
204 | 202 | ||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 557b2abceef8..d50302774fe2 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -207,6 +207,9 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) | |||
207 | unsigned long addr; | 207 | unsigned long addr; |
208 | int i; | 208 | int i; |
209 | 209 | ||
210 | if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ | ||
211 | return; | ||
212 | |||
210 | pud = pud_offset(pgd, 0); | 213 | pud = pud_offset(pgd, 0); |
211 | 214 | ||
212 | for (addr = i = 0; i < PREALLOCATED_PMDS; | 215 | for (addr = i = 0; i < PREALLOCATED_PMDS; |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index b4becbf8c570..cab0abbd1ebe 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -20,53 +20,6 @@ | |||
20 | #include <asm/tlb.h> | 20 | #include <asm/tlb.h> |
21 | #include <asm/tlbflush.h> | 21 | #include <asm/tlbflush.h> |
22 | 22 | ||
23 | void show_mem(void) | ||
24 | { | ||
25 | int total = 0, reserved = 0; | ||
26 | int shared = 0, cached = 0; | ||
27 | int highmem = 0; | ||
28 | struct page *page; | ||
29 | pg_data_t *pgdat; | ||
30 | unsigned long i; | ||
31 | unsigned long flags; | ||
32 | |||
33 | printk(KERN_INFO "Mem-info:\n"); | ||
34 | show_free_areas(); | ||
35 | for_each_online_pgdat(pgdat) { | ||
36 | pgdat_resize_lock(pgdat, &flags); | ||
37 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | ||
38 | if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | ||
39 | touch_nmi_watchdog(); | ||
40 | page = pgdat_page_nr(pgdat, i); | ||
41 | total++; | ||
42 | if (PageHighMem(page)) | ||
43 | highmem++; | ||
44 | if (PageReserved(page)) | ||
45 | reserved++; | ||
46 | else if (PageSwapCache(page)) | ||
47 | cached++; | ||
48 | else if (page_count(page)) | ||
49 | shared += page_count(page) - 1; | ||
50 | } | ||
51 | pgdat_resize_unlock(pgdat, &flags); | ||
52 | } | ||
53 | printk(KERN_INFO "%d pages of RAM\n", total); | ||
54 | printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); | ||
55 | printk(KERN_INFO "%d reserved pages\n", reserved); | ||
56 | printk(KERN_INFO "%d pages shared\n", shared); | ||
57 | printk(KERN_INFO "%d pages swap cached\n", cached); | ||
58 | |||
59 | printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY)); | ||
60 | printk(KERN_INFO "%lu pages writeback\n", | ||
61 | global_page_state(NR_WRITEBACK)); | ||
62 | printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); | ||
63 | printk(KERN_INFO "%lu pages slab\n", | ||
64 | global_page_state(NR_SLAB_RECLAIMABLE) + | ||
65 | global_page_state(NR_SLAB_UNRECLAIMABLE)); | ||
66 | printk(KERN_INFO "%lu pages pagetables\n", | ||
67 | global_page_state(NR_PAGETABLE)); | ||
68 | } | ||
69 | |||
70 | /* | 23 | /* |
71 | * Associate a virtual page frame with a given physical page frame | 24 | * Associate a virtual page frame with a given physical page frame |
72 | * and protection flags for that frame. | 25 | * and protection flags for that frame. |