diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/mm/discontig_32.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 298 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 78 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 85 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 18 | ||||
-rw-r--r-- | arch/x86/mm/mmio-mod.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/pageattr-test.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 27 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 50 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/pgtable_32.c | 47 | ||||
-rw-r--r-- | arch/x86/mm/srat_32.c | 12 |
15 files changed, 475 insertions, 163 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 1fbb844c3d7a..dfb932dcf136 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | 1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ |
2 | pat.o pgtable.o | 2 | pat.o pgtable.o gup.o |
3 | 3 | ||
4 | obj-$(CONFIG_X86_32) += pgtable_32.o | 4 | obj-$(CONFIG_X86_32) += pgtable_32.o |
5 | 5 | ||
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 5dfef9fa061a..62fa440678d8 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
@@ -42,7 +42,6 @@ | |||
42 | 42 | ||
43 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 43 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
44 | EXPORT_SYMBOL(node_data); | 44 | EXPORT_SYMBOL(node_data); |
45 | static bootmem_data_t node0_bdata; | ||
46 | 45 | ||
47 | /* | 46 | /* |
48 | * numa interface - we expect the numa architecture specific code to have | 47 | * numa interface - we expect the numa architecture specific code to have |
@@ -385,7 +384,7 @@ void __init initmem_init(unsigned long start_pfn, | |||
385 | for_each_online_node(nid) | 384 | for_each_online_node(nid) |
386 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | 385 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); |
387 | 386 | ||
388 | NODE_DATA(0)->bdata = &node0_bdata; | 387 | NODE_DATA(0)->bdata = &bootmem_node_data[0]; |
389 | setup_bootmem_allocator(); | 388 | setup_bootmem_allocator(); |
390 | } | 389 | } |
391 | 390 | ||
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c new file mode 100644 index 000000000000..007bb06c7504 --- /dev/null +++ b/arch/x86/mm/gup.c | |||
@@ -0,0 +1,298 @@ | |||
1 | /* | ||
2 | * Lockless get_user_pages_fast for x86 | ||
3 | * | ||
4 | * Copyright (C) 2008 Nick Piggin | ||
5 | * Copyright (C) 2008 Novell Inc. | ||
6 | */ | ||
7 | #include <linux/sched.h> | ||
8 | #include <linux/mm.h> | ||
9 | #include <linux/vmstat.h> | ||
10 | #include <linux/highmem.h> | ||
11 | |||
12 | #include <asm/pgtable.h> | ||
13 | |||
14 | static inline pte_t gup_get_pte(pte_t *ptep) | ||
15 | { | ||
16 | #ifndef CONFIG_X86_PAE | ||
17 | return *ptep; | ||
18 | #else | ||
19 | /* | ||
20 | * With get_user_pages_fast, we walk down the pagetables without taking | ||
21 | * any locks. For this we would like to load the pointers atoimcally, | ||
22 | * but that is not possible (without expensive cmpxchg8b) on PAE. What | ||
23 | * we do have is the guarantee that a pte will only either go from not | ||
24 | * present to present, or present to not present or both -- it will not | ||
25 | * switch to a completely different present page without a TLB flush in | ||
26 | * between; something that we are blocking by holding interrupts off. | ||
27 | * | ||
28 | * Setting ptes from not present to present goes: | ||
29 | * ptep->pte_high = h; | ||
30 | * smp_wmb(); | ||
31 | * ptep->pte_low = l; | ||
32 | * | ||
33 | * And present to not present goes: | ||
34 | * ptep->pte_low = 0; | ||
35 | * smp_wmb(); | ||
36 | * ptep->pte_high = 0; | ||
37 | * | ||
38 | * We must ensure here that the load of pte_low sees l iff pte_high | ||
39 | * sees h. We load pte_high *after* loading pte_low, which ensures we | ||
40 | * don't see an older value of pte_high. *Then* we recheck pte_low, | ||
41 | * which ensures that we haven't picked up a changed pte high. We might | ||
42 | * have got rubbish values from pte_low and pte_high, but we are | ||
43 | * guaranteed that pte_low will not have the present bit set *unless* | ||
44 | * it is 'l'. And get_user_pages_fast only operates on present ptes, so | ||
45 | * we're safe. | ||
46 | * | ||
47 | * gup_get_pte should not be used or copied outside gup.c without being | ||
48 | * very careful -- it does not atomically load the pte or anything that | ||
49 | * is likely to be useful for you. | ||
50 | */ | ||
51 | pte_t pte; | ||
52 | |||
53 | retry: | ||
54 | pte.pte_low = ptep->pte_low; | ||
55 | smp_rmb(); | ||
56 | pte.pte_high = ptep->pte_high; | ||
57 | smp_rmb(); | ||
58 | if (unlikely(pte.pte_low != ptep->pte_low)) | ||
59 | goto retry; | ||
60 | |||
61 | return pte; | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * The performance critical leaf functions are made noinline otherwise gcc | ||
67 | * inlines everything into a single function which results in too much | ||
68 | * register pressure. | ||
69 | */ | ||
70 | static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | ||
71 | unsigned long end, int write, struct page **pages, int *nr) | ||
72 | { | ||
73 | unsigned long mask; | ||
74 | pte_t *ptep; | ||
75 | |||
76 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
77 | if (write) | ||
78 | mask |= _PAGE_RW; | ||
79 | |||
80 | ptep = pte_offset_map(&pmd, addr); | ||
81 | do { | ||
82 | pte_t pte = gup_get_pte(ptep); | ||
83 | struct page *page; | ||
84 | |||
85 | if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) { | ||
86 | pte_unmap(ptep); | ||
87 | return 0; | ||
88 | } | ||
89 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
90 | page = pte_page(pte); | ||
91 | get_page(page); | ||
92 | pages[*nr] = page; | ||
93 | (*nr)++; | ||
94 | |||
95 | } while (ptep++, addr += PAGE_SIZE, addr != end); | ||
96 | pte_unmap(ptep - 1); | ||
97 | |||
98 | return 1; | ||
99 | } | ||
100 | |||
101 | static inline void get_head_page_multiple(struct page *page, int nr) | ||
102 | { | ||
103 | VM_BUG_ON(page != compound_head(page)); | ||
104 | VM_BUG_ON(page_count(page) == 0); | ||
105 | atomic_add(nr, &page->_count); | ||
106 | } | ||
107 | |||
108 | static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | ||
109 | unsigned long end, int write, struct page **pages, int *nr) | ||
110 | { | ||
111 | unsigned long mask; | ||
112 | pte_t pte = *(pte_t *)&pmd; | ||
113 | struct page *head, *page; | ||
114 | int refs; | ||
115 | |||
116 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
117 | if (write) | ||
118 | mask |= _PAGE_RW; | ||
119 | if ((pte_val(pte) & mask) != mask) | ||
120 | return 0; | ||
121 | /* hugepages are never "special" */ | ||
122 | VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); | ||
123 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
124 | |||
125 | refs = 0; | ||
126 | head = pte_page(pte); | ||
127 | page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); | ||
128 | do { | ||
129 | VM_BUG_ON(compound_head(page) != head); | ||
130 | pages[*nr] = page; | ||
131 | (*nr)++; | ||
132 | page++; | ||
133 | refs++; | ||
134 | } while (addr += PAGE_SIZE, addr != end); | ||
135 | get_head_page_multiple(head, refs); | ||
136 | |||
137 | return 1; | ||
138 | } | ||
139 | |||
140 | static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | ||
141 | int write, struct page **pages, int *nr) | ||
142 | { | ||
143 | unsigned long next; | ||
144 | pmd_t *pmdp; | ||
145 | |||
146 | pmdp = pmd_offset(&pud, addr); | ||
147 | do { | ||
148 | pmd_t pmd = *pmdp; | ||
149 | |||
150 | next = pmd_addr_end(addr, end); | ||
151 | if (pmd_none(pmd)) | ||
152 | return 0; | ||
153 | if (unlikely(pmd_large(pmd))) { | ||
154 | if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) | ||
155 | return 0; | ||
156 | } else { | ||
157 | if (!gup_pte_range(pmd, addr, next, write, pages, nr)) | ||
158 | return 0; | ||
159 | } | ||
160 | } while (pmdp++, addr = next, addr != end); | ||
161 | |||
162 | return 1; | ||
163 | } | ||
164 | |||
165 | static noinline int gup_huge_pud(pud_t pud, unsigned long addr, | ||
166 | unsigned long end, int write, struct page **pages, int *nr) | ||
167 | { | ||
168 | unsigned long mask; | ||
169 | pte_t pte = *(pte_t *)&pud; | ||
170 | struct page *head, *page; | ||
171 | int refs; | ||
172 | |||
173 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
174 | if (write) | ||
175 | mask |= _PAGE_RW; | ||
176 | if ((pte_val(pte) & mask) != mask) | ||
177 | return 0; | ||
178 | /* hugepages are never "special" */ | ||
179 | VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); | ||
180 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
181 | |||
182 | refs = 0; | ||
183 | head = pte_page(pte); | ||
184 | page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); | ||
185 | do { | ||
186 | VM_BUG_ON(compound_head(page) != head); | ||
187 | pages[*nr] = page; | ||
188 | (*nr)++; | ||
189 | page++; | ||
190 | refs++; | ||
191 | } while (addr += PAGE_SIZE, addr != end); | ||
192 | get_head_page_multiple(head, refs); | ||
193 | |||
194 | return 1; | ||
195 | } | ||
196 | |||
197 | static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | ||
198 | int write, struct page **pages, int *nr) | ||
199 | { | ||
200 | unsigned long next; | ||
201 | pud_t *pudp; | ||
202 | |||
203 | pudp = pud_offset(&pgd, addr); | ||
204 | do { | ||
205 | pud_t pud = *pudp; | ||
206 | |||
207 | next = pud_addr_end(addr, end); | ||
208 | if (pud_none(pud)) | ||
209 | return 0; | ||
210 | if (unlikely(pud_large(pud))) { | ||
211 | if (!gup_huge_pud(pud, addr, next, write, pages, nr)) | ||
212 | return 0; | ||
213 | } else { | ||
214 | if (!gup_pmd_range(pud, addr, next, write, pages, nr)) | ||
215 | return 0; | ||
216 | } | ||
217 | } while (pudp++, addr = next, addr != end); | ||
218 | |||
219 | return 1; | ||
220 | } | ||
221 | |||
222 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
223 | struct page **pages) | ||
224 | { | ||
225 | struct mm_struct *mm = current->mm; | ||
226 | unsigned long addr, len, end; | ||
227 | unsigned long next; | ||
228 | pgd_t *pgdp; | ||
229 | int nr = 0; | ||
230 | |||
231 | start &= PAGE_MASK; | ||
232 | addr = start; | ||
233 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
234 | end = start + len; | ||
235 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | ||
236 | start, len))) | ||
237 | goto slow_irqon; | ||
238 | |||
239 | /* | ||
240 | * XXX: batch / limit 'nr', to avoid large irq off latency | ||
241 | * needs some instrumenting to determine the common sizes used by | ||
242 | * important workloads (eg. DB2), and whether limiting the batch size | ||
243 | * will decrease performance. | ||
244 | * | ||
245 | * It seems like we're in the clear for the moment. Direct-IO is | ||
246 | * the main guy that batches up lots of get_user_pages, and even | ||
247 | * they are limited to 64-at-a-time which is not so many. | ||
248 | */ | ||
249 | /* | ||
250 | * This doesn't prevent pagetable teardown, but does prevent | ||
251 | * the pagetables and pages from being freed on x86. | ||
252 | * | ||
253 | * So long as we atomically load page table pointers versus teardown | ||
254 | * (which we do on x86, with the above PAE exception), we can follow the | ||
255 | * address down to the the page and take a ref on it. | ||
256 | */ | ||
257 | local_irq_disable(); | ||
258 | pgdp = pgd_offset(mm, addr); | ||
259 | do { | ||
260 | pgd_t pgd = *pgdp; | ||
261 | |||
262 | next = pgd_addr_end(addr, end); | ||
263 | if (pgd_none(pgd)) | ||
264 | goto slow; | ||
265 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
266 | goto slow; | ||
267 | } while (pgdp++, addr = next, addr != end); | ||
268 | local_irq_enable(); | ||
269 | |||
270 | VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); | ||
271 | return nr; | ||
272 | |||
273 | { | ||
274 | int ret; | ||
275 | |||
276 | slow: | ||
277 | local_irq_enable(); | ||
278 | slow_irqon: | ||
279 | /* Try to get the remaining pages with get_user_pages */ | ||
280 | start += nr << PAGE_SHIFT; | ||
281 | pages += nr; | ||
282 | |||
283 | down_read(&mm->mmap_sem); | ||
284 | ret = get_user_pages(current, mm, start, | ||
285 | (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); | ||
286 | up_read(&mm->mmap_sem); | ||
287 | |||
288 | /* Have to be a bit careful with return values */ | ||
289 | if (nr > 0) { | ||
290 | if (ret < 0) | ||
291 | ret = nr; | ||
292 | else | ||
293 | ret += nr; | ||
294 | } | ||
295 | |||
296 | return ret; | ||
297 | } | ||
298 | } | ||
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 0b3d567e686d..8f307d914c2e 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -124,7 +124,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | |||
124 | return 1; | 124 | return 1; |
125 | } | 125 | } |
126 | 126 | ||
127 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 127 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
128 | unsigned long addr, unsigned long sz) | ||
128 | { | 129 | { |
129 | pgd_t *pgd; | 130 | pgd_t *pgd; |
130 | pud_t *pud; | 131 | pud_t *pud; |
@@ -133,9 +134,14 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
133 | pgd = pgd_offset(mm, addr); | 134 | pgd = pgd_offset(mm, addr); |
134 | pud = pud_alloc(mm, pgd, addr); | 135 | pud = pud_alloc(mm, pgd, addr); |
135 | if (pud) { | 136 | if (pud) { |
136 | if (pud_none(*pud)) | 137 | if (sz == PUD_SIZE) { |
137 | huge_pmd_share(mm, addr, pud); | 138 | pte = (pte_t *)pud; |
138 | pte = (pte_t *) pmd_alloc(mm, pud, addr); | 139 | } else { |
140 | BUG_ON(sz != PMD_SIZE); | ||
141 | if (pud_none(*pud)) | ||
142 | huge_pmd_share(mm, addr, pud); | ||
143 | pte = (pte_t *) pmd_alloc(mm, pud, addr); | ||
144 | } | ||
139 | } | 145 | } |
140 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); | 146 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); |
141 | 147 | ||
@@ -151,8 +157,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
151 | pgd = pgd_offset(mm, addr); | 157 | pgd = pgd_offset(mm, addr); |
152 | if (pgd_present(*pgd)) { | 158 | if (pgd_present(*pgd)) { |
153 | pud = pud_offset(pgd, addr); | 159 | pud = pud_offset(pgd, addr); |
154 | if (pud_present(*pud)) | 160 | if (pud_present(*pud)) { |
161 | if (pud_large(*pud)) | ||
162 | return (pte_t *)pud; | ||
155 | pmd = pmd_offset(pud, addr); | 163 | pmd = pmd_offset(pud, addr); |
164 | } | ||
156 | } | 165 | } |
157 | return (pte_t *) pmd; | 166 | return (pte_t *) pmd; |
158 | } | 167 | } |
@@ -188,6 +197,11 @@ int pmd_huge(pmd_t pmd) | |||
188 | return 0; | 197 | return 0; |
189 | } | 198 | } |
190 | 199 | ||
200 | int pud_huge(pud_t pud) | ||
201 | { | ||
202 | return 0; | ||
203 | } | ||
204 | |||
191 | struct page * | 205 | struct page * |
192 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 206 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
193 | pmd_t *pmd, int write) | 207 | pmd_t *pmd, int write) |
@@ -208,6 +222,11 @@ int pmd_huge(pmd_t pmd) | |||
208 | return !!(pmd_val(pmd) & _PAGE_PSE); | 222 | return !!(pmd_val(pmd) & _PAGE_PSE); |
209 | } | 223 | } |
210 | 224 | ||
225 | int pud_huge(pud_t pud) | ||
226 | { | ||
227 | return !!(pud_val(pud) & _PAGE_PSE); | ||
228 | } | ||
229 | |||
211 | struct page * | 230 | struct page * |
212 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 231 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
213 | pmd_t *pmd, int write) | 232 | pmd_t *pmd, int write) |
@@ -216,9 +235,22 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
216 | 235 | ||
217 | page = pte_page(*(pte_t *)pmd); | 236 | page = pte_page(*(pte_t *)pmd); |
218 | if (page) | 237 | if (page) |
219 | page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); | 238 | page += ((address & ~PMD_MASK) >> PAGE_SHIFT); |
239 | return page; | ||
240 | } | ||
241 | |||
242 | struct page * | ||
243 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
244 | pud_t *pud, int write) | ||
245 | { | ||
246 | struct page *page; | ||
247 | |||
248 | page = pte_page(*(pte_t *)pud); | ||
249 | if (page) | ||
250 | page += ((address & ~PUD_MASK) >> PAGE_SHIFT); | ||
220 | return page; | 251 | return page; |
221 | } | 252 | } |
253 | |||
222 | #endif | 254 | #endif |
223 | 255 | ||
224 | /* x86_64 also uses this file */ | 256 | /* x86_64 also uses this file */ |
@@ -228,6 +260,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | |||
228 | unsigned long addr, unsigned long len, | 260 | unsigned long addr, unsigned long len, |
229 | unsigned long pgoff, unsigned long flags) | 261 | unsigned long pgoff, unsigned long flags) |
230 | { | 262 | { |
263 | struct hstate *h = hstate_file(file); | ||
231 | struct mm_struct *mm = current->mm; | 264 | struct mm_struct *mm = current->mm; |
232 | struct vm_area_struct *vma; | 265 | struct vm_area_struct *vma; |
233 | unsigned long start_addr; | 266 | unsigned long start_addr; |
@@ -240,7 +273,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | |||
240 | } | 273 | } |
241 | 274 | ||
242 | full_search: | 275 | full_search: |
243 | addr = ALIGN(start_addr, HPAGE_SIZE); | 276 | addr = ALIGN(start_addr, huge_page_size(h)); |
244 | 277 | ||
245 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { | 278 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { |
246 | /* At this point: (!vma || addr < vma->vm_end). */ | 279 | /* At this point: (!vma || addr < vma->vm_end). */ |
@@ -262,7 +295,7 @@ full_search: | |||
262 | } | 295 | } |
263 | if (addr + mm->cached_hole_size < vma->vm_start) | 296 | if (addr + mm->cached_hole_size < vma->vm_start) |
264 | mm->cached_hole_size = vma->vm_start - addr; | 297 | mm->cached_hole_size = vma->vm_start - addr; |
265 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | 298 | addr = ALIGN(vma->vm_end, huge_page_size(h)); |
266 | } | 299 | } |
267 | } | 300 | } |
268 | 301 | ||
@@ -270,6 +303,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, | |||
270 | unsigned long addr0, unsigned long len, | 303 | unsigned long addr0, unsigned long len, |
271 | unsigned long pgoff, unsigned long flags) | 304 | unsigned long pgoff, unsigned long flags) |
272 | { | 305 | { |
306 | struct hstate *h = hstate_file(file); | ||
273 | struct mm_struct *mm = current->mm; | 307 | struct mm_struct *mm = current->mm; |
274 | struct vm_area_struct *vma, *prev_vma; | 308 | struct vm_area_struct *vma, *prev_vma; |
275 | unsigned long base = mm->mmap_base, addr = addr0; | 309 | unsigned long base = mm->mmap_base, addr = addr0; |
@@ -290,7 +324,7 @@ try_again: | |||
290 | goto fail; | 324 | goto fail; |
291 | 325 | ||
292 | /* either no address requested or cant fit in requested address hole */ | 326 | /* either no address requested or cant fit in requested address hole */ |
293 | addr = (mm->free_area_cache - len) & HPAGE_MASK; | 327 | addr = (mm->free_area_cache - len) & huge_page_mask(h); |
294 | do { | 328 | do { |
295 | /* | 329 | /* |
296 | * Lookup failure means no vma is above this address, | 330 | * Lookup failure means no vma is above this address, |
@@ -321,7 +355,7 @@ try_again: | |||
321 | largest_hole = vma->vm_start - addr; | 355 | largest_hole = vma->vm_start - addr; |
322 | 356 | ||
323 | /* try just below the current vma->vm_start */ | 357 | /* try just below the current vma->vm_start */ |
324 | addr = (vma->vm_start - len) & HPAGE_MASK; | 358 | addr = (vma->vm_start - len) & huge_page_mask(h); |
325 | } while (len <= vma->vm_start); | 359 | } while (len <= vma->vm_start); |
326 | 360 | ||
327 | fail: | 361 | fail: |
@@ -359,22 +393,23 @@ unsigned long | |||
359 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | 393 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
360 | unsigned long len, unsigned long pgoff, unsigned long flags) | 394 | unsigned long len, unsigned long pgoff, unsigned long flags) |
361 | { | 395 | { |
396 | struct hstate *h = hstate_file(file); | ||
362 | struct mm_struct *mm = current->mm; | 397 | struct mm_struct *mm = current->mm; |
363 | struct vm_area_struct *vma; | 398 | struct vm_area_struct *vma; |
364 | 399 | ||
365 | if (len & ~HPAGE_MASK) | 400 | if (len & ~huge_page_mask(h)) |
366 | return -EINVAL; | 401 | return -EINVAL; |
367 | if (len > TASK_SIZE) | 402 | if (len > TASK_SIZE) |
368 | return -ENOMEM; | 403 | return -ENOMEM; |
369 | 404 | ||
370 | if (flags & MAP_FIXED) { | 405 | if (flags & MAP_FIXED) { |
371 | if (prepare_hugepage_range(addr, len)) | 406 | if (prepare_hugepage_range(file, addr, len)) |
372 | return -EINVAL; | 407 | return -EINVAL; |
373 | return addr; | 408 | return addr; |
374 | } | 409 | } |
375 | 410 | ||
376 | if (addr) { | 411 | if (addr) { |
377 | addr = ALIGN(addr, HPAGE_SIZE); | 412 | addr = ALIGN(addr, huge_page_size(h)); |
378 | vma = find_vma(mm, addr); | 413 | vma = find_vma(mm, addr); |
379 | if (TASK_SIZE - len >= addr && | 414 | if (TASK_SIZE - len >= addr && |
380 | (!vma || addr + len <= vma->vm_start)) | 415 | (!vma || addr + len <= vma->vm_start)) |
@@ -390,3 +425,20 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
390 | 425 | ||
391 | #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ | 426 | #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ |
392 | 427 | ||
428 | #ifdef CONFIG_X86_64 | ||
429 | static __init int setup_hugepagesz(char *opt) | ||
430 | { | ||
431 | unsigned long ps = memparse(opt, &opt); | ||
432 | if (ps == PMD_SIZE) { | ||
433 | hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); | ||
434 | } else if (ps == PUD_SIZE && cpu_has_gbpages) { | ||
435 | hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); | ||
436 | } else { | ||
437 | printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n", | ||
438 | ps >> 20); | ||
439 | return 0; | ||
440 | } | ||
441 | return 1; | ||
442 | } | ||
443 | __setup("hugepagesz=", setup_hugepagesz); | ||
444 | #endif | ||
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index d37f29376b0c..60ec1d08ff24 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -458,11 +458,7 @@ static void __init pagetable_init(void) | |||
458 | { | 458 | { |
459 | pgd_t *pgd_base = swapper_pg_dir; | 459 | pgd_t *pgd_base = swapper_pg_dir; |
460 | 460 | ||
461 | paravirt_pagetable_setup_start(pgd_base); | ||
462 | |||
463 | permanent_kmaps_init(pgd_base); | 461 | permanent_kmaps_init(pgd_base); |
464 | |||
465 | paravirt_pagetable_setup_done(pgd_base); | ||
466 | } | 462 | } |
467 | 463 | ||
468 | #ifdef CONFIG_ACPI_SLEEP | 464 | #ifdef CONFIG_ACPI_SLEEP |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ec37121f6709..d3746efb060d 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -60,7 +60,7 @@ static unsigned long dma_reserve __initdata; | |||
60 | 60 | ||
61 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 61 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
62 | 62 | ||
63 | int direct_gbpages __meminitdata | 63 | int direct_gbpages |
64 | #ifdef CONFIG_DIRECT_GBPAGES | 64 | #ifdef CONFIG_DIRECT_GBPAGES |
65 | = 1 | 65 | = 1 |
66 | #endif | 66 | #endif |
@@ -86,46 +86,13 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
86 | * around without checking the pgd every time. | 86 | * around without checking the pgd every time. |
87 | */ | 87 | */ |
88 | 88 | ||
89 | void show_mem(void) | ||
90 | { | ||
91 | long i, total = 0, reserved = 0; | ||
92 | long shared = 0, cached = 0; | ||
93 | struct page *page; | ||
94 | pg_data_t *pgdat; | ||
95 | |||
96 | printk(KERN_INFO "Mem-info:\n"); | ||
97 | show_free_areas(); | ||
98 | for_each_online_pgdat(pgdat) { | ||
99 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | ||
100 | /* | ||
101 | * This loop can take a while with 256 GB and | ||
102 | * 4k pages so defer the NMI watchdog: | ||
103 | */ | ||
104 | if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | ||
105 | touch_nmi_watchdog(); | ||
106 | |||
107 | if (!pfn_valid(pgdat->node_start_pfn + i)) | ||
108 | continue; | ||
109 | |||
110 | page = pfn_to_page(pgdat->node_start_pfn + i); | ||
111 | total++; | ||
112 | if (PageReserved(page)) | ||
113 | reserved++; | ||
114 | else if (PageSwapCache(page)) | ||
115 | cached++; | ||
116 | else if (page_count(page)) | ||
117 | shared += page_count(page) - 1; | ||
118 | } | ||
119 | } | ||
120 | printk(KERN_INFO "%lu pages of RAM\n", total); | ||
121 | printk(KERN_INFO "%lu reserved pages\n", reserved); | ||
122 | printk(KERN_INFO "%lu pages shared\n", shared); | ||
123 | printk(KERN_INFO "%lu pages swap cached\n", cached); | ||
124 | } | ||
125 | |||
126 | int after_bootmem; | 89 | int after_bootmem; |
127 | 90 | ||
128 | static __init void *spp_getpage(void) | 91 | /* |
92 | * NOTE: This function is marked __ref because it calls __init function | ||
93 | * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. | ||
94 | */ | ||
95 | static __ref void *spp_getpage(void) | ||
129 | { | 96 | { |
130 | void *ptr; | 97 | void *ptr; |
131 | 98 | ||
@@ -274,7 +241,7 @@ static unsigned long __initdata table_start; | |||
274 | static unsigned long __meminitdata table_end; | 241 | static unsigned long __meminitdata table_end; |
275 | static unsigned long __meminitdata table_top; | 242 | static unsigned long __meminitdata table_top; |
276 | 243 | ||
277 | static __meminit void *alloc_low_page(unsigned long *phys) | 244 | static __ref void *alloc_low_page(unsigned long *phys) |
278 | { | 245 | { |
279 | unsigned long pfn = table_end++; | 246 | unsigned long pfn = table_end++; |
280 | void *adr; | 247 | void *adr; |
@@ -295,7 +262,7 @@ static __meminit void *alloc_low_page(unsigned long *phys) | |||
295 | return adr; | 262 | return adr; |
296 | } | 263 | } |
297 | 264 | ||
298 | static __meminit void unmap_low_page(void *adr) | 265 | static __ref void unmap_low_page(void *adr) |
299 | { | 266 | { |
300 | if (after_bootmem) | 267 | if (after_bootmem) |
301 | return; | 268 | return; |
@@ -351,6 +318,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
351 | { | 318 | { |
352 | unsigned long pages = 0; | 319 | unsigned long pages = 0; |
353 | unsigned long last_map_addr = end; | 320 | unsigned long last_map_addr = end; |
321 | unsigned long start = address; | ||
354 | 322 | ||
355 | int i = pmd_index(address); | 323 | int i = pmd_index(address); |
356 | 324 | ||
@@ -368,16 +336,24 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
368 | } | 336 | } |
369 | 337 | ||
370 | if (pmd_val(*pmd)) { | 338 | if (pmd_val(*pmd)) { |
371 | if (!pmd_large(*pmd)) | 339 | if (!pmd_large(*pmd)) { |
340 | spin_lock(&init_mm.page_table_lock); | ||
372 | last_map_addr = phys_pte_update(pmd, address, | 341 | last_map_addr = phys_pte_update(pmd, address, |
373 | end); | 342 | end); |
343 | spin_unlock(&init_mm.page_table_lock); | ||
344 | } | ||
345 | /* Count entries we're using from level2_ident_pgt */ | ||
346 | if (start == 0) | ||
347 | pages++; | ||
374 | continue; | 348 | continue; |
375 | } | 349 | } |
376 | 350 | ||
377 | if (page_size_mask & (1<<PG_LEVEL_2M)) { | 351 | if (page_size_mask & (1<<PG_LEVEL_2M)) { |
378 | pages++; | 352 | pages++; |
353 | spin_lock(&init_mm.page_table_lock); | ||
379 | set_pte((pte_t *)pmd, | 354 | set_pte((pte_t *)pmd, |
380 | pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 355 | pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); |
356 | spin_unlock(&init_mm.page_table_lock); | ||
381 | last_map_addr = (address & PMD_MASK) + PMD_SIZE; | 357 | last_map_addr = (address & PMD_MASK) + PMD_SIZE; |
382 | continue; | 358 | continue; |
383 | } | 359 | } |
@@ -386,7 +362,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
386 | last_map_addr = phys_pte_init(pte, address, end); | 362 | last_map_addr = phys_pte_init(pte, address, end); |
387 | unmap_low_page(pte); | 363 | unmap_low_page(pte); |
388 | 364 | ||
365 | spin_lock(&init_mm.page_table_lock); | ||
389 | pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); | 366 | pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); |
367 | spin_unlock(&init_mm.page_table_lock); | ||
390 | } | 368 | } |
391 | update_page_count(PG_LEVEL_2M, pages); | 369 | update_page_count(PG_LEVEL_2M, pages); |
392 | return last_map_addr; | 370 | return last_map_addr; |
@@ -399,9 +377,7 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, | |||
399 | pmd_t *pmd = pmd_offset(pud, 0); | 377 | pmd_t *pmd = pmd_offset(pud, 0); |
400 | unsigned long last_map_addr; | 378 | unsigned long last_map_addr; |
401 | 379 | ||
402 | spin_lock(&init_mm.page_table_lock); | ||
403 | last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); | 380 | last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); |
404 | spin_unlock(&init_mm.page_table_lock); | ||
405 | __flush_tlb_all(); | 381 | __flush_tlb_all(); |
406 | return last_map_addr; | 382 | return last_map_addr; |
407 | } | 383 | } |
@@ -437,20 +413,21 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
437 | 413 | ||
438 | if (page_size_mask & (1<<PG_LEVEL_1G)) { | 414 | if (page_size_mask & (1<<PG_LEVEL_1G)) { |
439 | pages++; | 415 | pages++; |
416 | spin_lock(&init_mm.page_table_lock); | ||
440 | set_pte((pte_t *)pud, | 417 | set_pte((pte_t *)pud, |
441 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 418 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); |
419 | spin_unlock(&init_mm.page_table_lock); | ||
442 | last_map_addr = (addr & PUD_MASK) + PUD_SIZE; | 420 | last_map_addr = (addr & PUD_MASK) + PUD_SIZE; |
443 | continue; | 421 | continue; |
444 | } | 422 | } |
445 | 423 | ||
446 | pmd = alloc_low_page(&pmd_phys); | 424 | pmd = alloc_low_page(&pmd_phys); |
447 | |||
448 | spin_lock(&init_mm.page_table_lock); | ||
449 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); | 425 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); |
450 | unmap_low_page(pmd); | 426 | unmap_low_page(pmd); |
427 | |||
428 | spin_lock(&init_mm.page_table_lock); | ||
451 | pud_populate(&init_mm, pud, __va(pmd_phys)); | 429 | pud_populate(&init_mm, pud, __va(pmd_phys)); |
452 | spin_unlock(&init_mm.page_table_lock); | 430 | spin_unlock(&init_mm.page_table_lock); |
453 | |||
454 | } | 431 | } |
455 | __flush_tlb_all(); | 432 | __flush_tlb_all(); |
456 | update_page_count(PG_LEVEL_1G, pages); | 433 | update_page_count(PG_LEVEL_1G, pages); |
@@ -542,16 +519,14 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start, | |||
542 | continue; | 519 | continue; |
543 | } | 520 | } |
544 | 521 | ||
545 | if (after_bootmem) | 522 | pud = alloc_low_page(&pud_phys); |
546 | pud = pud_offset(pgd, start & PGDIR_MASK); | ||
547 | else | ||
548 | pud = alloc_low_page(&pud_phys); | ||
549 | |||
550 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), | 523 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), |
551 | page_size_mask); | 524 | page_size_mask); |
552 | unmap_low_page(pud); | 525 | unmap_low_page(pud); |
553 | pgd_populate(&init_mm, pgd_offset_k(start), | 526 | |
554 | __va(pud_phys)); | 527 | spin_lock(&init_mm.page_table_lock); |
528 | pgd_populate(&init_mm, pgd, __va(pud_phys)); | ||
529 | spin_unlock(&init_mm.page_table_lock); | ||
555 | } | 530 | } |
556 | 531 | ||
557 | return last_map_addr; | 532 | return last_map_addr; |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 24c1d3c30186..d4b6e6a29ae3 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -170,7 +170,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
170 | phys_addr &= PAGE_MASK; | 170 | phys_addr &= PAGE_MASK; |
171 | size = PAGE_ALIGN(last_addr+1) - phys_addr; | 171 | size = PAGE_ALIGN(last_addr+1) - phys_addr; |
172 | 172 | ||
173 | retval = reserve_memtype(phys_addr, phys_addr + size, | 173 | retval = reserve_memtype(phys_addr, (u64)phys_addr + size, |
174 | prot_val, &new_prot_val); | 174 | prot_val, &new_prot_val); |
175 | if (retval) { | 175 | if (retval) { |
176 | pr_debug("Warning: reserve_memtype returned %d\n", retval); | 176 | pr_debug("Warning: reserve_memtype returned %d\n", retval); |
@@ -330,6 +330,14 @@ static void __iomem *ioremap_default(resource_size_t phys_addr, | |||
330 | return (void __iomem *)ret; | 330 | return (void __iomem *)ret; |
331 | } | 331 | } |
332 | 332 | ||
333 | void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, | ||
334 | unsigned long prot_val) | ||
335 | { | ||
336 | return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK), | ||
337 | __builtin_return_address(0)); | ||
338 | } | ||
339 | EXPORT_SYMBOL(ioremap_prot); | ||
340 | |||
333 | /** | 341 | /** |
334 | * iounmap - Free a IO remapping | 342 | * iounmap - Free a IO remapping |
335 | * @addr: virtual address from ioremap_* | 343 | * @addr: virtual address from ioremap_* |
@@ -545,13 +553,11 @@ static int __init check_early_ioremap_leak(void) | |||
545 | { | 553 | { |
546 | if (!early_ioremap_nested) | 554 | if (!early_ioremap_nested) |
547 | return 0; | 555 | return 0; |
548 | 556 | WARN(1, KERN_WARNING | |
549 | printk(KERN_WARNING | ||
550 | "Debug warning: early ioremap leak of %d areas detected.\n", | 557 | "Debug warning: early ioremap leak of %d areas detected.\n", |
551 | early_ioremap_nested); | 558 | early_ioremap_nested); |
552 | printk(KERN_WARNING | 559 | printk(KERN_WARNING |
553 | "please boot with early_ioremap_debug and report the dmesg.\n"); | 560 | "please boot with early_ioremap_debug and report the dmesg.\n"); |
554 | WARN_ON(1); | ||
555 | 561 | ||
556 | return 1; | 562 | return 1; |
557 | } | 563 | } |
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index e7397e108beb..635b50e85581 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c | |||
@@ -430,7 +430,9 @@ static void enter_uniprocessor(void) | |||
430 | "may miss events.\n"); | 430 | "may miss events.\n"); |
431 | } | 431 | } |
432 | 432 | ||
433 | static void leave_uniprocessor(void) | 433 | /* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, |
434 | but this whole function is ifdefed CONFIG_HOTPLUG_CPU */ | ||
435 | static void __ref leave_uniprocessor(void) | ||
434 | { | 436 | { |
435 | int cpu; | 437 | int cpu; |
436 | int err; | 438 | int err; |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9782f42dd319..a4dd793d6003 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -23,8 +23,6 @@ | |||
23 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 23 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
24 | EXPORT_SYMBOL(node_data); | 24 | EXPORT_SYMBOL(node_data); |
25 | 25 | ||
26 | static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; | ||
27 | |||
28 | struct memnode memnode; | 26 | struct memnode memnode; |
29 | 27 | ||
30 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | 28 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { |
@@ -198,7 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
198 | nodedata_phys + pgdat_size - 1); | 196 | nodedata_phys + pgdat_size - 1); |
199 | 197 | ||
200 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 198 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
201 | NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; | 199 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; |
202 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 200 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
203 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; | 201 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
204 | 202 | ||
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 0dcd42eb94e6..d4aa503caaa2 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c | |||
@@ -221,8 +221,7 @@ static int pageattr_test(void) | |||
221 | failed += print_split(&sc); | 221 | failed += print_split(&sc); |
222 | 222 | ||
223 | if (failed) { | 223 | if (failed) { |
224 | printk(KERN_ERR "NOT PASSED. Please report.\n"); | 224 | WARN(1, KERN_ERR "NOT PASSED. Please report.\n"); |
225 | WARN_ON(1); | ||
226 | return -EINVAL; | 225 | return -EINVAL; |
227 | } else { | 226 | } else { |
228 | if (print) | 227 | if (print) |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 65c6e46bf059..43e2f8483e4f 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -55,13 +55,19 @@ static void split_page_count(int level) | |||
55 | 55 | ||
56 | int arch_report_meminfo(char *page) | 56 | int arch_report_meminfo(char *page) |
57 | { | 57 | { |
58 | int n = sprintf(page, "DirectMap4k: %8lu\n" | 58 | int n = sprintf(page, "DirectMap4k: %8lu kB\n", |
59 | "DirectMap2M: %8lu\n", | 59 | direct_pages_count[PG_LEVEL_4K] << 2); |
60 | direct_pages_count[PG_LEVEL_4K], | 60 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
61 | direct_pages_count[PG_LEVEL_2M]); | 61 | n += sprintf(page + n, "DirectMap2M: %8lu kB\n", |
62 | direct_pages_count[PG_LEVEL_2M] << 11); | ||
63 | #else | ||
64 | n += sprintf(page + n, "DirectMap4M: %8lu kB\n", | ||
65 | direct_pages_count[PG_LEVEL_2M] << 12); | ||
66 | #endif | ||
62 | #ifdef CONFIG_X86_64 | 67 | #ifdef CONFIG_X86_64 |
63 | n += sprintf(page + n, "DirectMap1G: %8lu\n", | 68 | if (direct_gbpages) |
64 | direct_pages_count[PG_LEVEL_1G]); | 69 | n += sprintf(page + n, "DirectMap1G: %8lu kB\n", |
70 | direct_pages_count[PG_LEVEL_1G] << 20); | ||
65 | #endif | 71 | #endif |
66 | return n; | 72 | return n; |
67 | } | 73 | } |
@@ -592,10 +598,9 @@ repeat: | |||
592 | if (!pte_val(old_pte)) { | 598 | if (!pte_val(old_pte)) { |
593 | if (!primary) | 599 | if (!primary) |
594 | return 0; | 600 | return 0; |
595 | printk(KERN_WARNING "CPA: called for zero pte. " | 601 | WARN(1, KERN_WARNING "CPA: called for zero pte. " |
596 | "vaddr = %lx cpa->vaddr = %lx\n", address, | 602 | "vaddr = %lx cpa->vaddr = %lx\n", address, |
597 | cpa->vaddr); | 603 | cpa->vaddr); |
598 | WARN_ON(1); | ||
599 | return -EINVAL; | 604 | return -EINVAL; |
600 | } | 605 | } |
601 | 606 | ||
@@ -844,7 +849,7 @@ int set_memory_uc(unsigned long addr, int numpages) | |||
844 | /* | 849 | /* |
845 | * for now UC MINUS. see comments in ioremap_nocache() | 850 | * for now UC MINUS. see comments in ioremap_nocache() |
846 | */ | 851 | */ |
847 | if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, | 852 | if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, |
848 | _PAGE_CACHE_UC_MINUS, NULL)) | 853 | _PAGE_CACHE_UC_MINUS, NULL)) |
849 | return -EINVAL; | 854 | return -EINVAL; |
850 | 855 | ||
@@ -863,7 +868,7 @@ int set_memory_wc(unsigned long addr, int numpages) | |||
863 | if (!pat_enabled) | 868 | if (!pat_enabled) |
864 | return set_memory_uc(addr, numpages); | 869 | return set_memory_uc(addr, numpages); |
865 | 870 | ||
866 | if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, | 871 | if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, |
867 | _PAGE_CACHE_WC, NULL)) | 872 | _PAGE_CACHE_WC, NULL)) |
868 | return -EINVAL; | 873 | return -EINVAL; |
869 | 874 | ||
@@ -879,7 +884,7 @@ int _set_memory_wb(unsigned long addr, int numpages) | |||
879 | 884 | ||
880 | int set_memory_wb(unsigned long addr, int numpages) | 885 | int set_memory_wb(unsigned long addr, int numpages) |
881 | { | 886 | { |
882 | free_memtype(addr, addr + numpages * PAGE_SIZE); | 887 | free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); |
883 | 888 | ||
884 | return _set_memory_wb(addr, numpages); | 889 | return _set_memory_wb(addr, numpages); |
885 | } | 890 | } |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 2fe30916d4b6..2a50e0fa64a5 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -207,6 +207,9 @@ static int chk_conflict(struct memtype *new, struct memtype *entry, | |||
207 | return -EBUSY; | 207 | return -EBUSY; |
208 | } | 208 | } |
209 | 209 | ||
210 | static struct memtype *cached_entry; | ||
211 | static u64 cached_start; | ||
212 | |||
210 | /* | 213 | /* |
211 | * req_type typically has one of the: | 214 | * req_type typically has one of the: |
212 | * - _PAGE_CACHE_WB | 215 | * - _PAGE_CACHE_WB |
@@ -280,11 +283,17 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
280 | 283 | ||
281 | spin_lock(&memtype_lock); | 284 | spin_lock(&memtype_lock); |
282 | 285 | ||
286 | if (cached_entry && start >= cached_start) | ||
287 | entry = cached_entry; | ||
288 | else | ||
289 | entry = list_entry(&memtype_list, struct memtype, nd); | ||
290 | |||
283 | /* Search for existing mapping that overlaps the current range */ | 291 | /* Search for existing mapping that overlaps the current range */ |
284 | where = NULL; | 292 | where = NULL; |
285 | list_for_each_entry(entry, &memtype_list, nd) { | 293 | list_for_each_entry_continue(entry, &memtype_list, nd) { |
286 | if (end <= entry->start) { | 294 | if (end <= entry->start) { |
287 | where = entry->nd.prev; | 295 | where = entry->nd.prev; |
296 | cached_entry = list_entry(where, struct memtype, nd); | ||
288 | break; | 297 | break; |
289 | } else if (start <= entry->start) { /* end > entry->start */ | 298 | } else if (start <= entry->start) { /* end > entry->start */ |
290 | err = chk_conflict(new, entry, new_type); | 299 | err = chk_conflict(new, entry, new_type); |
@@ -292,6 +301,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
292 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | 301 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
293 | entry->start, entry->end); | 302 | entry->start, entry->end); |
294 | where = entry->nd.prev; | 303 | where = entry->nd.prev; |
304 | cached_entry = list_entry(where, | ||
305 | struct memtype, nd); | ||
295 | } | 306 | } |
296 | break; | 307 | break; |
297 | } else if (start < entry->end) { /* start > entry->start */ | 308 | } else if (start < entry->end) { /* start > entry->start */ |
@@ -299,7 +310,20 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
299 | if (!err) { | 310 | if (!err) { |
300 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | 311 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
301 | entry->start, entry->end); | 312 | entry->start, entry->end); |
302 | where = &entry->nd; | 313 | cached_entry = list_entry(entry->nd.prev, |
314 | struct memtype, nd); | ||
315 | |||
316 | /* | ||
317 | * Move to right position in the linked | ||
318 | * list to add this new entry | ||
319 | */ | ||
320 | list_for_each_entry_continue(entry, | ||
321 | &memtype_list, nd) { | ||
322 | if (start <= entry->start) { | ||
323 | where = entry->nd.prev; | ||
324 | break; | ||
325 | } | ||
326 | } | ||
303 | } | 327 | } |
304 | break; | 328 | break; |
305 | } | 329 | } |
@@ -314,6 +338,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
314 | return err; | 338 | return err; |
315 | } | 339 | } |
316 | 340 | ||
341 | cached_start = start; | ||
342 | |||
317 | if (where) | 343 | if (where) |
318 | list_add(&new->nd, where); | 344 | list_add(&new->nd, where); |
319 | else | 345 | else |
@@ -343,6 +369,9 @@ int free_memtype(u64 start, u64 end) | |||
343 | spin_lock(&memtype_lock); | 369 | spin_lock(&memtype_lock); |
344 | list_for_each_entry(entry, &memtype_list, nd) { | 370 | list_for_each_entry(entry, &memtype_list, nd) { |
345 | if (entry->start == start && entry->end == end) { | 371 | if (entry->start == start && entry->end == end) { |
372 | if (cached_entry == entry || cached_start == start) | ||
373 | cached_entry = NULL; | ||
374 | |||
346 | list_del(&entry->nd); | 375 | list_del(&entry->nd); |
347 | kfree(entry); | 376 | kfree(entry); |
348 | err = 0; | 377 | err = 0; |
@@ -361,14 +390,6 @@ int free_memtype(u64 start, u64 end) | |||
361 | } | 390 | } |
362 | 391 | ||
363 | 392 | ||
364 | /* | ||
365 | * /dev/mem mmap interface. The memtype used for mapping varies: | ||
366 | * - Use UC for mappings with O_SYNC flag | ||
367 | * - Without O_SYNC flag, if there is any conflict in reserve_memtype, | ||
368 | * inherit the memtype from existing mapping. | ||
369 | * - Else use UC_MINUS memtype (for backward compatibility with existing | ||
370 | * X drivers. | ||
371 | */ | ||
372 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | 393 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, |
373 | unsigned long size, pgprot_t vma_prot) | 394 | unsigned long size, pgprot_t vma_prot) |
374 | { | 395 | { |
@@ -406,14 +427,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
406 | unsigned long size, pgprot_t *vma_prot) | 427 | unsigned long size, pgprot_t *vma_prot) |
407 | { | 428 | { |
408 | u64 offset = ((u64) pfn) << PAGE_SHIFT; | 429 | u64 offset = ((u64) pfn) << PAGE_SHIFT; |
409 | unsigned long flags = _PAGE_CACHE_UC_MINUS; | 430 | unsigned long flags = -1; |
410 | int retval; | 431 | int retval; |
411 | 432 | ||
412 | if (!range_is_allowed(pfn, size)) | 433 | if (!range_is_allowed(pfn, size)) |
413 | return 0; | 434 | return 0; |
414 | 435 | ||
415 | if (file->f_flags & O_SYNC) { | 436 | if (file->f_flags & O_SYNC) { |
416 | flags = _PAGE_CACHE_UC; | 437 | flags = _PAGE_CACHE_UC_MINUS; |
417 | } | 438 | } |
418 | 439 | ||
419 | #ifdef CONFIG_X86_32 | 440 | #ifdef CONFIG_X86_32 |
@@ -436,13 +457,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
436 | #endif | 457 | #endif |
437 | 458 | ||
438 | /* | 459 | /* |
439 | * With O_SYNC, we can only take UC mapping. Fail if we cannot. | 460 | * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot. |
461 | * | ||
440 | * Without O_SYNC, we want to get | 462 | * Without O_SYNC, we want to get |
441 | * - WB for WB-able memory and no other conflicting mappings | 463 | * - WB for WB-able memory and no other conflicting mappings |
442 | * - UC_MINUS for non-WB-able memory with no other conflicting mappings | 464 | * - UC_MINUS for non-WB-able memory with no other conflicting mappings |
443 | * - Inherit from confliting mappings otherwise | 465 | * - Inherit from confliting mappings otherwise |
444 | */ | 466 | */ |
445 | if (flags != _PAGE_CACHE_UC_MINUS) { | 467 | if (flags != -1) { |
446 | retval = reserve_memtype(offset, offset + size, flags, NULL); | 468 | retval = reserve_memtype(offset, offset + size, flags, NULL); |
447 | } else { | 469 | } else { |
448 | retval = reserve_memtype(offset, offset + size, -1, &flags); | 470 | retval = reserve_memtype(offset, offset + size, -1, &flags); |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 557b2abceef8..d50302774fe2 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -207,6 +207,9 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) | |||
207 | unsigned long addr; | 207 | unsigned long addr; |
208 | int i; | 208 | int i; |
209 | 209 | ||
210 | if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ | ||
211 | return; | ||
212 | |||
210 | pud = pud_offset(pgd, 0); | 213 | pud = pud_offset(pgd, 0); |
211 | 214 | ||
212 | for (addr = i = 0; i < PREALLOCATED_PMDS; | 215 | for (addr = i = 0; i < PREALLOCATED_PMDS; |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index b4becbf8c570..cab0abbd1ebe 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -20,53 +20,6 @@ | |||
20 | #include <asm/tlb.h> | 20 | #include <asm/tlb.h> |
21 | #include <asm/tlbflush.h> | 21 | #include <asm/tlbflush.h> |
22 | 22 | ||
23 | void show_mem(void) | ||
24 | { | ||
25 | int total = 0, reserved = 0; | ||
26 | int shared = 0, cached = 0; | ||
27 | int highmem = 0; | ||
28 | struct page *page; | ||
29 | pg_data_t *pgdat; | ||
30 | unsigned long i; | ||
31 | unsigned long flags; | ||
32 | |||
33 | printk(KERN_INFO "Mem-info:\n"); | ||
34 | show_free_areas(); | ||
35 | for_each_online_pgdat(pgdat) { | ||
36 | pgdat_resize_lock(pgdat, &flags); | ||
37 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | ||
38 | if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | ||
39 | touch_nmi_watchdog(); | ||
40 | page = pgdat_page_nr(pgdat, i); | ||
41 | total++; | ||
42 | if (PageHighMem(page)) | ||
43 | highmem++; | ||
44 | if (PageReserved(page)) | ||
45 | reserved++; | ||
46 | else if (PageSwapCache(page)) | ||
47 | cached++; | ||
48 | else if (page_count(page)) | ||
49 | shared += page_count(page) - 1; | ||
50 | } | ||
51 | pgdat_resize_unlock(pgdat, &flags); | ||
52 | } | ||
53 | printk(KERN_INFO "%d pages of RAM\n", total); | ||
54 | printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); | ||
55 | printk(KERN_INFO "%d reserved pages\n", reserved); | ||
56 | printk(KERN_INFO "%d pages shared\n", shared); | ||
57 | printk(KERN_INFO "%d pages swap cached\n", cached); | ||
58 | |||
59 | printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY)); | ||
60 | printk(KERN_INFO "%lu pages writeback\n", | ||
61 | global_page_state(NR_WRITEBACK)); | ||
62 | printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); | ||
63 | printk(KERN_INFO "%lu pages slab\n", | ||
64 | global_page_state(NR_SLAB_RECLAIMABLE) + | ||
65 | global_page_state(NR_SLAB_UNRECLAIMABLE)); | ||
66 | printk(KERN_INFO "%lu pages pagetables\n", | ||
67 | global_page_state(NR_PAGETABLE)); | ||
68 | } | ||
69 | |||
70 | /* | 23 | /* |
71 | * Associate a virtual page frame with a given physical page frame | 24 | * Associate a virtual page frame with a given physical page frame |
72 | * and protection flags for that frame. | 25 | * and protection flags for that frame. |
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 1eb2973a301c..16ae70fc57e7 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -178,7 +178,7 @@ void acpi_numa_arch_fixup(void) | |||
178 | * start of the node, and that the current "end" address is after | 178 | * start of the node, and that the current "end" address is after |
179 | * the previous one. | 179 | * the previous one. |
180 | */ | 180 | */ |
181 | static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) | 181 | static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) |
182 | { | 182 | { |
183 | /* | 183 | /* |
184 | * Only add present memory as told by the e820. | 184 | * Only add present memory as told by the e820. |
@@ -189,10 +189,10 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c | |||
189 | if (memory_chunk->start_pfn >= max_pfn) { | 189 | if (memory_chunk->start_pfn >= max_pfn) { |
190 | printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", | 190 | printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", |
191 | memory_chunk->start_pfn, memory_chunk->end_pfn); | 191 | memory_chunk->start_pfn, memory_chunk->end_pfn); |
192 | return; | 192 | return -1; |
193 | } | 193 | } |
194 | if (memory_chunk->nid != nid) | 194 | if (memory_chunk->nid != nid) |
195 | return; | 195 | return -1; |
196 | 196 | ||
197 | if (!node_has_online_mem(nid)) | 197 | if (!node_has_online_mem(nid)) |
198 | node_start_pfn[nid] = memory_chunk->start_pfn; | 198 | node_start_pfn[nid] = memory_chunk->start_pfn; |
@@ -202,6 +202,8 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c | |||
202 | 202 | ||
203 | if (node_end_pfn[nid] < memory_chunk->end_pfn) | 203 | if (node_end_pfn[nid] < memory_chunk->end_pfn) |
204 | node_end_pfn[nid] = memory_chunk->end_pfn; | 204 | node_end_pfn[nid] = memory_chunk->end_pfn; |
205 | |||
206 | return 0; | ||
205 | } | 207 | } |
206 | 208 | ||
207 | int __init get_memcfg_from_srat(void) | 209 | int __init get_memcfg_from_srat(void) |
@@ -259,7 +261,9 @@ int __init get_memcfg_from_srat(void) | |||
259 | printk(KERN_DEBUG | 261 | printk(KERN_DEBUG |
260 | "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", | 262 | "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", |
261 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); | 263 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); |
262 | node_read_chunk(chunk->nid, chunk); | 264 | if (node_read_chunk(chunk->nid, chunk)) |
265 | continue; | ||
266 | |||
263 | e820_register_active_regions(chunk->nid, chunk->start_pfn, | 267 | e820_register_active_regions(chunk->nid, chunk->start_pfn, |
264 | min(chunk->end_pfn, max_pfn)); | 268 | min(chunk->end_pfn, max_pfn)); |
265 | } | 269 | } |