aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-08-14 06:19:59 -0400
committerIngo Molnar <mingo@elte.hu>2008-08-14 06:19:59 -0400
commit8d7ccaa545490cdffdfaff0842436a8dd85cf47b (patch)
tree8129b5907161bc6ae26deb3645ce1e280c5e1f51 /arch/x86/mm
parentb2139aa0eec330c711c5a279db361e5ef1178e78 (diff)
parent30a2f3c60a84092c8084dfe788b710f8d0768cd4 (diff)
Merge commit 'v2.6.27-rc3' into x86/prototypes
Conflicts: include/asm-x86/dma-mapping.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/discontig_32.c3
-rw-r--r--arch/x86/mm/dump_pagetables.c10
-rw-r--r--arch/x86/mm/gup.c298
-rw-r--r--arch/x86/mm/hugetlbpage.c78
-rw-r--r--arch/x86/mm/init_64.c37
-rw-r--r--arch/x86/mm/ioremap.c8
-rw-r--r--arch/x86/mm/numa_64.c4
-rw-r--r--arch/x86/mm/pgtable.c3
-rw-r--r--arch/x86/mm/pgtable_32.c47
10 files changed, 382 insertions, 108 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 1fbb844c3d7a..dfb932dcf136 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,5 @@
1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o pgtable.o 2 pat.o pgtable.o gup.o
3 3
4obj-$(CONFIG_X86_32) += pgtable_32.o 4obj-$(CONFIG_X86_32) += pgtable_32.o
5 5
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 5dfef9fa061a..62fa440678d8 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -42,7 +42,6 @@
42 42
43struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 43struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
44EXPORT_SYMBOL(node_data); 44EXPORT_SYMBOL(node_data);
45static bootmem_data_t node0_bdata;
46 45
47/* 46/*
48 * numa interface - we expect the numa architecture specific code to have 47 * numa interface - we expect the numa architecture specific code to have
@@ -385,7 +384,7 @@ void __init initmem_init(unsigned long start_pfn,
385 for_each_online_node(nid) 384 for_each_online_node(nid)
386 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 385 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
387 386
388 NODE_DATA(0)->bdata = &node0_bdata; 387 NODE_DATA(0)->bdata = &bootmem_node_data[0];
389 setup_bootmem_allocator(); 388 setup_bootmem_allocator();
390} 389}
391 390
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 0bb0caed8971..a20d1fa64b4e 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -148,8 +148,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
148 * we have now. "break" is either changing perms, levels or 148 * we have now. "break" is either changing perms, levels or
149 * address space marker. 149 * address space marker.
150 */ 150 */
151 prot = pgprot_val(new_prot) & ~(PTE_MASK); 151 prot = pgprot_val(new_prot) & ~(PTE_PFN_MASK);
152 cur = pgprot_val(st->current_prot) & ~(PTE_MASK); 152 cur = pgprot_val(st->current_prot) & ~(PTE_PFN_MASK);
153 153
154 if (!st->level) { 154 if (!st->level) {
155 /* First entry */ 155 /* First entry */
@@ -221,7 +221,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
221 for (i = 0; i < PTRS_PER_PMD; i++) { 221 for (i = 0; i < PTRS_PER_PMD; i++) {
222 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); 222 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
223 if (!pmd_none(*start)) { 223 if (!pmd_none(*start)) {
224 pgprotval_t prot = pmd_val(*start) & ~PTE_MASK; 224 pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK;
225 225
226 if (pmd_large(*start) || !pmd_present(*start)) 226 if (pmd_large(*start) || !pmd_present(*start))
227 note_page(m, st, __pgprot(prot), 3); 227 note_page(m, st, __pgprot(prot), 3);
@@ -253,7 +253,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
253 for (i = 0; i < PTRS_PER_PUD; i++) { 253 for (i = 0; i < PTRS_PER_PUD; i++) {
254 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); 254 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
255 if (!pud_none(*start)) { 255 if (!pud_none(*start)) {
256 pgprotval_t prot = pud_val(*start) & ~PTE_MASK; 256 pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK;
257 257
258 if (pud_large(*start) || !pud_present(*start)) 258 if (pud_large(*start) || !pud_present(*start))
259 note_page(m, st, __pgprot(prot), 2); 259 note_page(m, st, __pgprot(prot), 2);
@@ -288,7 +288,7 @@ static void walk_pgd_level(struct seq_file *m)
288 for (i = 0; i < PTRS_PER_PGD; i++) { 288 for (i = 0; i < PTRS_PER_PGD; i++) {
289 st.current_address = normalize_addr(i * PGD_LEVEL_MULT); 289 st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
290 if (!pgd_none(*start)) { 290 if (!pgd_none(*start)) {
291 pgprotval_t prot = pgd_val(*start) & ~PTE_MASK; 291 pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;
292 292
293 if (pgd_large(*start) || !pgd_present(*start)) 293 if (pgd_large(*start) || !pgd_present(*start))
294 note_page(m, &st, __pgprot(prot), 1); 294 note_page(m, &st, __pgprot(prot), 1);
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
new file mode 100644
index 000000000000..007bb06c7504
--- /dev/null
+++ b/arch/x86/mm/gup.c
@@ -0,0 +1,298 @@
1/*
2 * Lockless get_user_pages_fast for x86
3 *
4 * Copyright (C) 2008 Nick Piggin
5 * Copyright (C) 2008 Novell Inc.
6 */
7#include <linux/sched.h>
8#include <linux/mm.h>
9#include <linux/vmstat.h>
10#include <linux/highmem.h>
11
12#include <asm/pgtable.h>
13
14static inline pte_t gup_get_pte(pte_t *ptep)
15{
16#ifndef CONFIG_X86_PAE
17 return *ptep;
18#else
19 /*
20 * With get_user_pages_fast, we walk down the pagetables without taking
21 * any locks. For this we would like to load the pointers atoimcally,
22 * but that is not possible (without expensive cmpxchg8b) on PAE. What
23 * we do have is the guarantee that a pte will only either go from not
24 * present to present, or present to not present or both -- it will not
25 * switch to a completely different present page without a TLB flush in
26 * between; something that we are blocking by holding interrupts off.
27 *
28 * Setting ptes from not present to present goes:
29 * ptep->pte_high = h;
30 * smp_wmb();
31 * ptep->pte_low = l;
32 *
33 * And present to not present goes:
34 * ptep->pte_low = 0;
35 * smp_wmb();
36 * ptep->pte_high = 0;
37 *
38 * We must ensure here that the load of pte_low sees l iff pte_high
39 * sees h. We load pte_high *after* loading pte_low, which ensures we
40 * don't see an older value of pte_high. *Then* we recheck pte_low,
41 * which ensures that we haven't picked up a changed pte high. We might
42 * have got rubbish values from pte_low and pte_high, but we are
43 * guaranteed that pte_low will not have the present bit set *unless*
44 * it is 'l'. And get_user_pages_fast only operates on present ptes, so
45 * we're safe.
46 *
47 * gup_get_pte should not be used or copied outside gup.c without being
48 * very careful -- it does not atomically load the pte or anything that
49 * is likely to be useful for you.
50 */
51 pte_t pte;
52
53retry:
54 pte.pte_low = ptep->pte_low;
55 smp_rmb();
56 pte.pte_high = ptep->pte_high;
57 smp_rmb();
58 if (unlikely(pte.pte_low != ptep->pte_low))
59 goto retry;
60
61 return pte;
62#endif
63}
64
65/*
66 * The performance critical leaf functions are made noinline otherwise gcc
67 * inlines everything into a single function which results in too much
68 * register pressure.
69 */
70static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
71 unsigned long end, int write, struct page **pages, int *nr)
72{
73 unsigned long mask;
74 pte_t *ptep;
75
76 mask = _PAGE_PRESENT|_PAGE_USER;
77 if (write)
78 mask |= _PAGE_RW;
79
80 ptep = pte_offset_map(&pmd, addr);
81 do {
82 pte_t pte = gup_get_pte(ptep);
83 struct page *page;
84
85 if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) {
86 pte_unmap(ptep);
87 return 0;
88 }
89 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
90 page = pte_page(pte);
91 get_page(page);
92 pages[*nr] = page;
93 (*nr)++;
94
95 } while (ptep++, addr += PAGE_SIZE, addr != end);
96 pte_unmap(ptep - 1);
97
98 return 1;
99}
100
101static inline void get_head_page_multiple(struct page *page, int nr)
102{
103 VM_BUG_ON(page != compound_head(page));
104 VM_BUG_ON(page_count(page) == 0);
105 atomic_add(nr, &page->_count);
106}
107
108static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
109 unsigned long end, int write, struct page **pages, int *nr)
110{
111 unsigned long mask;
112 pte_t pte = *(pte_t *)&pmd;
113 struct page *head, *page;
114 int refs;
115
116 mask = _PAGE_PRESENT|_PAGE_USER;
117 if (write)
118 mask |= _PAGE_RW;
119 if ((pte_val(pte) & mask) != mask)
120 return 0;
121 /* hugepages are never "special" */
122 VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL);
123 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
124
125 refs = 0;
126 head = pte_page(pte);
127 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
128 do {
129 VM_BUG_ON(compound_head(page) != head);
130 pages[*nr] = page;
131 (*nr)++;
132 page++;
133 refs++;
134 } while (addr += PAGE_SIZE, addr != end);
135 get_head_page_multiple(head, refs);
136
137 return 1;
138}
139
140static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
141 int write, struct page **pages, int *nr)
142{
143 unsigned long next;
144 pmd_t *pmdp;
145
146 pmdp = pmd_offset(&pud, addr);
147 do {
148 pmd_t pmd = *pmdp;
149
150 next = pmd_addr_end(addr, end);
151 if (pmd_none(pmd))
152 return 0;
153 if (unlikely(pmd_large(pmd))) {
154 if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
155 return 0;
156 } else {
157 if (!gup_pte_range(pmd, addr, next, write, pages, nr))
158 return 0;
159 }
160 } while (pmdp++, addr = next, addr != end);
161
162 return 1;
163}
164
165static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
166 unsigned long end, int write, struct page **pages, int *nr)
167{
168 unsigned long mask;
169 pte_t pte = *(pte_t *)&pud;
170 struct page *head, *page;
171 int refs;
172
173 mask = _PAGE_PRESENT|_PAGE_USER;
174 if (write)
175 mask |= _PAGE_RW;
176 if ((pte_val(pte) & mask) != mask)
177 return 0;
178 /* hugepages are never "special" */
179 VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL);
180 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
181
182 refs = 0;
183 head = pte_page(pte);
184 page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
185 do {
186 VM_BUG_ON(compound_head(page) != head);
187 pages[*nr] = page;
188 (*nr)++;
189 page++;
190 refs++;
191 } while (addr += PAGE_SIZE, addr != end);
192 get_head_page_multiple(head, refs);
193
194 return 1;
195}
196
197static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
198 int write, struct page **pages, int *nr)
199{
200 unsigned long next;
201 pud_t *pudp;
202
203 pudp = pud_offset(&pgd, addr);
204 do {
205 pud_t pud = *pudp;
206
207 next = pud_addr_end(addr, end);
208 if (pud_none(pud))
209 return 0;
210 if (unlikely(pud_large(pud))) {
211 if (!gup_huge_pud(pud, addr, next, write, pages, nr))
212 return 0;
213 } else {
214 if (!gup_pmd_range(pud, addr, next, write, pages, nr))
215 return 0;
216 }
217 } while (pudp++, addr = next, addr != end);
218
219 return 1;
220}
221
222int get_user_pages_fast(unsigned long start, int nr_pages, int write,
223 struct page **pages)
224{
225 struct mm_struct *mm = current->mm;
226 unsigned long addr, len, end;
227 unsigned long next;
228 pgd_t *pgdp;
229 int nr = 0;
230
231 start &= PAGE_MASK;
232 addr = start;
233 len = (unsigned long) nr_pages << PAGE_SHIFT;
234 end = start + len;
235 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
236 start, len)))
237 goto slow_irqon;
238
239 /*
240 * XXX: batch / limit 'nr', to avoid large irq off latency
241 * needs some instrumenting to determine the common sizes used by
242 * important workloads (eg. DB2), and whether limiting the batch size
243 * will decrease performance.
244 *
245 * It seems like we're in the clear for the moment. Direct-IO is
246 * the main guy that batches up lots of get_user_pages, and even
247 * they are limited to 64-at-a-time which is not so many.
248 */
249 /*
250 * This doesn't prevent pagetable teardown, but does prevent
251 * the pagetables and pages from being freed on x86.
252 *
253 * So long as we atomically load page table pointers versus teardown
254 * (which we do on x86, with the above PAE exception), we can follow the
255 * address down to the the page and take a ref on it.
256 */
257 local_irq_disable();
258 pgdp = pgd_offset(mm, addr);
259 do {
260 pgd_t pgd = *pgdp;
261
262 next = pgd_addr_end(addr, end);
263 if (pgd_none(pgd))
264 goto slow;
265 if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
266 goto slow;
267 } while (pgdp++, addr = next, addr != end);
268 local_irq_enable();
269
270 VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
271 return nr;
272
273 {
274 int ret;
275
276slow:
277 local_irq_enable();
278slow_irqon:
279 /* Try to get the remaining pages with get_user_pages */
280 start += nr << PAGE_SHIFT;
281 pages += nr;
282
283 down_read(&mm->mmap_sem);
284 ret = get_user_pages(current, mm, start,
285 (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
286 up_read(&mm->mmap_sem);
287
288 /* Have to be a bit careful with return values */
289 if (nr > 0) {
290 if (ret < 0)
291 ret = nr;
292 else
293 ret += nr;
294 }
295
296 return ret;
297 }
298}
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 0b3d567e686d..8f307d914c2e 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -124,7 +124,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
124 return 1; 124 return 1;
125} 125}
126 126
127pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 127pte_t *huge_pte_alloc(struct mm_struct *mm,
128 unsigned long addr, unsigned long sz)
128{ 129{
129 pgd_t *pgd; 130 pgd_t *pgd;
130 pud_t *pud; 131 pud_t *pud;
@@ -133,9 +134,14 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
133 pgd = pgd_offset(mm, addr); 134 pgd = pgd_offset(mm, addr);
134 pud = pud_alloc(mm, pgd, addr); 135 pud = pud_alloc(mm, pgd, addr);
135 if (pud) { 136 if (pud) {
136 if (pud_none(*pud)) 137 if (sz == PUD_SIZE) {
137 huge_pmd_share(mm, addr, pud); 138 pte = (pte_t *)pud;
138 pte = (pte_t *) pmd_alloc(mm, pud, addr); 139 } else {
140 BUG_ON(sz != PMD_SIZE);
141 if (pud_none(*pud))
142 huge_pmd_share(mm, addr, pud);
143 pte = (pte_t *) pmd_alloc(mm, pud, addr);
144 }
139 } 145 }
140 BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); 146 BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
141 147
@@ -151,8 +157,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
151 pgd = pgd_offset(mm, addr); 157 pgd = pgd_offset(mm, addr);
152 if (pgd_present(*pgd)) { 158 if (pgd_present(*pgd)) {
153 pud = pud_offset(pgd, addr); 159 pud = pud_offset(pgd, addr);
154 if (pud_present(*pud)) 160 if (pud_present(*pud)) {
161 if (pud_large(*pud))
162 return (pte_t *)pud;
155 pmd = pmd_offset(pud, addr); 163 pmd = pmd_offset(pud, addr);
164 }
156 } 165 }
157 return (pte_t *) pmd; 166 return (pte_t *) pmd;
158} 167}
@@ -188,6 +197,11 @@ int pmd_huge(pmd_t pmd)
188 return 0; 197 return 0;
189} 198}
190 199
200int pud_huge(pud_t pud)
201{
202 return 0;
203}
204
191struct page * 205struct page *
192follow_huge_pmd(struct mm_struct *mm, unsigned long address, 206follow_huge_pmd(struct mm_struct *mm, unsigned long address,
193 pmd_t *pmd, int write) 207 pmd_t *pmd, int write)
@@ -208,6 +222,11 @@ int pmd_huge(pmd_t pmd)
208 return !!(pmd_val(pmd) & _PAGE_PSE); 222 return !!(pmd_val(pmd) & _PAGE_PSE);
209} 223}
210 224
225int pud_huge(pud_t pud)
226{
227 return !!(pud_val(pud) & _PAGE_PSE);
228}
229
211struct page * 230struct page *
212follow_huge_pmd(struct mm_struct *mm, unsigned long address, 231follow_huge_pmd(struct mm_struct *mm, unsigned long address,
213 pmd_t *pmd, int write) 232 pmd_t *pmd, int write)
@@ -216,9 +235,22 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
216 235
217 page = pte_page(*(pte_t *)pmd); 236 page = pte_page(*(pte_t *)pmd);
218 if (page) 237 if (page)
219 page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); 238 page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
239 return page;
240}
241
242struct page *
243follow_huge_pud(struct mm_struct *mm, unsigned long address,
244 pud_t *pud, int write)
245{
246 struct page *page;
247
248 page = pte_page(*(pte_t *)pud);
249 if (page)
250 page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
220 return page; 251 return page;
221} 252}
253
222#endif 254#endif
223 255
224/* x86_64 also uses this file */ 256/* x86_64 also uses this file */
@@ -228,6 +260,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
228 unsigned long addr, unsigned long len, 260 unsigned long addr, unsigned long len,
229 unsigned long pgoff, unsigned long flags) 261 unsigned long pgoff, unsigned long flags)
230{ 262{
263 struct hstate *h = hstate_file(file);
231 struct mm_struct *mm = current->mm; 264 struct mm_struct *mm = current->mm;
232 struct vm_area_struct *vma; 265 struct vm_area_struct *vma;
233 unsigned long start_addr; 266 unsigned long start_addr;
@@ -240,7 +273,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
240 } 273 }
241 274
242full_search: 275full_search:
243 addr = ALIGN(start_addr, HPAGE_SIZE); 276 addr = ALIGN(start_addr, huge_page_size(h));
244 277
245 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 278 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
246 /* At this point: (!vma || addr < vma->vm_end). */ 279 /* At this point: (!vma || addr < vma->vm_end). */
@@ -262,7 +295,7 @@ full_search:
262 } 295 }
263 if (addr + mm->cached_hole_size < vma->vm_start) 296 if (addr + mm->cached_hole_size < vma->vm_start)
264 mm->cached_hole_size = vma->vm_start - addr; 297 mm->cached_hole_size = vma->vm_start - addr;
265 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 298 addr = ALIGN(vma->vm_end, huge_page_size(h));
266 } 299 }
267} 300}
268 301
@@ -270,6 +303,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
270 unsigned long addr0, unsigned long len, 303 unsigned long addr0, unsigned long len,
271 unsigned long pgoff, unsigned long flags) 304 unsigned long pgoff, unsigned long flags)
272{ 305{
306 struct hstate *h = hstate_file(file);
273 struct mm_struct *mm = current->mm; 307 struct mm_struct *mm = current->mm;
274 struct vm_area_struct *vma, *prev_vma; 308 struct vm_area_struct *vma, *prev_vma;
275 unsigned long base = mm->mmap_base, addr = addr0; 309 unsigned long base = mm->mmap_base, addr = addr0;
@@ -290,7 +324,7 @@ try_again:
290 goto fail; 324 goto fail;
291 325
292 /* either no address requested or cant fit in requested address hole */ 326 /* either no address requested or cant fit in requested address hole */
293 addr = (mm->free_area_cache - len) & HPAGE_MASK; 327 addr = (mm->free_area_cache - len) & huge_page_mask(h);
294 do { 328 do {
295 /* 329 /*
296 * Lookup failure means no vma is above this address, 330 * Lookup failure means no vma is above this address,
@@ -321,7 +355,7 @@ try_again:
321 largest_hole = vma->vm_start - addr; 355 largest_hole = vma->vm_start - addr;
322 356
323 /* try just below the current vma->vm_start */ 357 /* try just below the current vma->vm_start */
324 addr = (vma->vm_start - len) & HPAGE_MASK; 358 addr = (vma->vm_start - len) & huge_page_mask(h);
325 } while (len <= vma->vm_start); 359 } while (len <= vma->vm_start);
326 360
327fail: 361fail:
@@ -359,22 +393,23 @@ unsigned long
359hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 393hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
360 unsigned long len, unsigned long pgoff, unsigned long flags) 394 unsigned long len, unsigned long pgoff, unsigned long flags)
361{ 395{
396 struct hstate *h = hstate_file(file);
362 struct mm_struct *mm = current->mm; 397 struct mm_struct *mm = current->mm;
363 struct vm_area_struct *vma; 398 struct vm_area_struct *vma;
364 399
365 if (len & ~HPAGE_MASK) 400 if (len & ~huge_page_mask(h))
366 return -EINVAL; 401 return -EINVAL;
367 if (len > TASK_SIZE) 402 if (len > TASK_SIZE)
368 return -ENOMEM; 403 return -ENOMEM;
369 404
370 if (flags & MAP_FIXED) { 405 if (flags & MAP_FIXED) {
371 if (prepare_hugepage_range(addr, len)) 406 if (prepare_hugepage_range(file, addr, len))
372 return -EINVAL; 407 return -EINVAL;
373 return addr; 408 return addr;
374 } 409 }
375 410
376 if (addr) { 411 if (addr) {
377 addr = ALIGN(addr, HPAGE_SIZE); 412 addr = ALIGN(addr, huge_page_size(h));
378 vma = find_vma(mm, addr); 413 vma = find_vma(mm, addr);
379 if (TASK_SIZE - len >= addr && 414 if (TASK_SIZE - len >= addr &&
380 (!vma || addr + len <= vma->vm_start)) 415 (!vma || addr + len <= vma->vm_start))
@@ -390,3 +425,20 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
390 425
391#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ 426#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
392 427
428#ifdef CONFIG_X86_64
429static __init int setup_hugepagesz(char *opt)
430{
431 unsigned long ps = memparse(opt, &opt);
432 if (ps == PMD_SIZE) {
433 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
434 } else if (ps == PUD_SIZE && cpu_has_gbpages) {
435 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
436 } else {
437 printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
438 ps >> 20);
439 return 0;
440 }
441 return 1;
442}
443__setup("hugepagesz=", setup_hugepagesz);
444#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ec37121f6709..129618ca0ea2 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -86,43 +86,6 @@ early_param("gbpages", parse_direct_gbpages_on);
86 * around without checking the pgd every time. 86 * around without checking the pgd every time.
87 */ 87 */
88 88
89void show_mem(void)
90{
91 long i, total = 0, reserved = 0;
92 long shared = 0, cached = 0;
93 struct page *page;
94 pg_data_t *pgdat;
95
96 printk(KERN_INFO "Mem-info:\n");
97 show_free_areas();
98 for_each_online_pgdat(pgdat) {
99 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
100 /*
101 * This loop can take a while with 256 GB and
102 * 4k pages so defer the NMI watchdog:
103 */
104 if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
105 touch_nmi_watchdog();
106
107 if (!pfn_valid(pgdat->node_start_pfn + i))
108 continue;
109
110 page = pfn_to_page(pgdat->node_start_pfn + i);
111 total++;
112 if (PageReserved(page))
113 reserved++;
114 else if (PageSwapCache(page))
115 cached++;
116 else if (page_count(page))
117 shared += page_count(page) - 1;
118 }
119 }
120 printk(KERN_INFO "%lu pages of RAM\n", total);
121 printk(KERN_INFO "%lu reserved pages\n", reserved);
122 printk(KERN_INFO "%lu pages shared\n", shared);
123 printk(KERN_INFO "%lu pages swap cached\n", cached);
124}
125
126int after_bootmem; 89int after_bootmem;
127 90
128static __init void *spp_getpage(void) 91static __init void *spp_getpage(void)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 19fd9a3c5210..fba57be9af68 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -330,6 +330,14 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
330 return (void __iomem *)ret; 330 return (void __iomem *)ret;
331} 331}
332 332
333void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
334 unsigned long prot_val)
335{
336 return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK),
337 __builtin_return_address(0));
338}
339EXPORT_SYMBOL(ioremap_prot);
340
333/** 341/**
334 * iounmap - Free a IO remapping 342 * iounmap - Free a IO remapping
335 * @addr: virtual address from ioremap_* 343 * @addr: virtual address from ioremap_*
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 9782f42dd319..a4dd793d6003 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -23,8 +23,6 @@
23struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 23struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
24EXPORT_SYMBOL(node_data); 24EXPORT_SYMBOL(node_data);
25 25
26static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
27
28struct memnode memnode; 26struct memnode memnode;
29 27
30s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { 28s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
@@ -198,7 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
198 nodedata_phys + pgdat_size - 1); 196 nodedata_phys + pgdat_size - 1);
199 197
200 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); 198 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
201 NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; 199 NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
202 NODE_DATA(nodeid)->node_start_pfn = start_pfn; 200 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
203 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; 201 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
204 202
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 557b2abceef8..d50302774fe2 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -207,6 +207,9 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
207 unsigned long addr; 207 unsigned long addr;
208 int i; 208 int i;
209 209
210 if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
211 return;
212
210 pud = pud_offset(pgd, 0); 213 pud = pud_offset(pgd, 0);
211 214
212 for (addr = i = 0; i < PREALLOCATED_PMDS; 215 for (addr = i = 0; i < PREALLOCATED_PMDS;
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index b4becbf8c570..cab0abbd1ebe 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -20,53 +20,6 @@
20#include <asm/tlb.h> 20#include <asm/tlb.h>
21#include <asm/tlbflush.h> 21#include <asm/tlbflush.h>
22 22
23void show_mem(void)
24{
25 int total = 0, reserved = 0;
26 int shared = 0, cached = 0;
27 int highmem = 0;
28 struct page *page;
29 pg_data_t *pgdat;
30 unsigned long i;
31 unsigned long flags;
32
33 printk(KERN_INFO "Mem-info:\n");
34 show_free_areas();
35 for_each_online_pgdat(pgdat) {
36 pgdat_resize_lock(pgdat, &flags);
37 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
38 if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
39 touch_nmi_watchdog();
40 page = pgdat_page_nr(pgdat, i);
41 total++;
42 if (PageHighMem(page))
43 highmem++;
44 if (PageReserved(page))
45 reserved++;
46 else if (PageSwapCache(page))
47 cached++;
48 else if (page_count(page))
49 shared += page_count(page) - 1;
50 }
51 pgdat_resize_unlock(pgdat, &flags);
52 }
53 printk(KERN_INFO "%d pages of RAM\n", total);
54 printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
55 printk(KERN_INFO "%d reserved pages\n", reserved);
56 printk(KERN_INFO "%d pages shared\n", shared);
57 printk(KERN_INFO "%d pages swap cached\n", cached);
58
59 printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
60 printk(KERN_INFO "%lu pages writeback\n",
61 global_page_state(NR_WRITEBACK));
62 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
63 printk(KERN_INFO "%lu pages slab\n",
64 global_page_state(NR_SLAB_RECLAIMABLE) +
65 global_page_state(NR_SLAB_UNRECLAIMABLE));
66 printk(KERN_INFO "%lu pages pagetables\n",
67 global_page_state(NR_PAGETABLE));
68}
69
70/* 23/*
71 * Associate a virtual page frame with a given physical page frame 24 * Associate a virtual page frame with a given physical page frame
72 * and protection flags for that frame. 25 * and protection flags for that frame.