diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/highmem_32.c | 9 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 344 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 256 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 280 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 35 | ||||
-rw-r--r-- | arch/x86/mm/kmmio.c | 164 | ||||
-rw-r--r-- | arch/x86/mm/memtest.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 5 | ||||
-rw-r--r-- | arch/x86/mm/testmmiotrace.c | 70 |
9 files changed, 602 insertions, 564 deletions
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 00f127c80b0e..d11745334a67 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c | |||
@@ -158,7 +158,6 @@ EXPORT_SYMBOL(kunmap); | |||
158 | EXPORT_SYMBOL(kmap_atomic); | 158 | EXPORT_SYMBOL(kmap_atomic); |
159 | EXPORT_SYMBOL(kunmap_atomic); | 159 | EXPORT_SYMBOL(kunmap_atomic); |
160 | 160 | ||
161 | #ifdef CONFIG_NUMA | ||
162 | void __init set_highmem_pages_init(void) | 161 | void __init set_highmem_pages_init(void) |
163 | { | 162 | { |
164 | struct zone *zone; | 163 | struct zone *zone; |
@@ -182,11 +181,3 @@ void __init set_highmem_pages_init(void) | |||
182 | } | 181 | } |
183 | totalram_pages += totalhigh_pages; | 182 | totalram_pages += totalhigh_pages; |
184 | } | 183 | } |
185 | #else | ||
186 | void __init set_highmem_pages_init(void) | ||
187 | { | ||
188 | add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); | ||
189 | |||
190 | totalram_pages += totalhigh_pages; | ||
191 | } | ||
192 | #endif /* CONFIG_NUMA */ | ||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ce6a722587d8..15219e0d1243 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -1,8 +1,345 @@ | |||
1 | #include <linux/ioport.h> | ||
1 | #include <linux/swap.h> | 2 | #include <linux/swap.h> |
3 | |||
2 | #include <asm/cacheflush.h> | 4 | #include <asm/cacheflush.h> |
5 | #include <asm/e820.h> | ||
6 | #include <asm/init.h> | ||
3 | #include <asm/page.h> | 7 | #include <asm/page.h> |
8 | #include <asm/page_types.h> | ||
4 | #include <asm/sections.h> | 9 | #include <asm/sections.h> |
5 | #include <asm/system.h> | 10 | #include <asm/system.h> |
11 | #include <asm/tlbflush.h> | ||
12 | |||
13 | unsigned long __initdata e820_table_start; | ||
14 | unsigned long __meminitdata e820_table_end; | ||
15 | unsigned long __meminitdata e820_table_top; | ||
16 | |||
17 | int after_bootmem; | ||
18 | |||
19 | int direct_gbpages | ||
20 | #ifdef CONFIG_DIRECT_GBPAGES | ||
21 | = 1 | ||
22 | #endif | ||
23 | ; | ||
24 | |||
25 | static void __init find_early_table_space(unsigned long end, int use_pse, | ||
26 | int use_gbpages) | ||
27 | { | ||
28 | unsigned long puds, pmds, ptes, tables, start; | ||
29 | |||
30 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | ||
31 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); | ||
32 | |||
33 | if (use_gbpages) { | ||
34 | unsigned long extra; | ||
35 | |||
36 | extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); | ||
37 | pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; | ||
38 | } else | ||
39 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | ||
40 | |||
41 | tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); | ||
42 | |||
43 | if (use_pse) { | ||
44 | unsigned long extra; | ||
45 | |||
46 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); | ||
47 | #ifdef CONFIG_X86_32 | ||
48 | extra += PMD_SIZE; | ||
49 | #endif | ||
50 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
51 | } else | ||
52 | ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
53 | |||
54 | tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); | ||
55 | |||
56 | #ifdef CONFIG_X86_32 | ||
57 | /* for fixmap */ | ||
58 | tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); | ||
59 | #endif | ||
60 | |||
61 | /* | ||
62 | * RED-PEN putting page tables only on node 0 could | ||
63 | * cause a hotspot and fill up ZONE_DMA. The page tables | ||
64 | * need roughly 0.5KB per GB. | ||
65 | */ | ||
66 | #ifdef CONFIG_X86_32 | ||
67 | start = 0x7000; | ||
68 | e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | ||
69 | tables, PAGE_SIZE); | ||
70 | #else /* CONFIG_X86_64 */ | ||
71 | start = 0x8000; | ||
72 | e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE); | ||
73 | #endif | ||
74 | if (e820_table_start == -1UL) | ||
75 | panic("Cannot find space for the kernel page tables"); | ||
76 | |||
77 | e820_table_start >>= PAGE_SHIFT; | ||
78 | e820_table_end = e820_table_start; | ||
79 | e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); | ||
80 | |||
81 | printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", | ||
82 | end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT); | ||
83 | } | ||
84 | |||
85 | struct map_range { | ||
86 | unsigned long start; | ||
87 | unsigned long end; | ||
88 | unsigned page_size_mask; | ||
89 | }; | ||
90 | |||
91 | #ifdef CONFIG_X86_32 | ||
92 | #define NR_RANGE_MR 3 | ||
93 | #else /* CONFIG_X86_64 */ | ||
94 | #define NR_RANGE_MR 5 | ||
95 | #endif | ||
96 | |||
97 | static int save_mr(struct map_range *mr, int nr_range, | ||
98 | unsigned long start_pfn, unsigned long end_pfn, | ||
99 | unsigned long page_size_mask) | ||
100 | { | ||
101 | if (start_pfn < end_pfn) { | ||
102 | if (nr_range >= NR_RANGE_MR) | ||
103 | panic("run out of range for init_memory_mapping\n"); | ||
104 | mr[nr_range].start = start_pfn<<PAGE_SHIFT; | ||
105 | mr[nr_range].end = end_pfn<<PAGE_SHIFT; | ||
106 | mr[nr_range].page_size_mask = page_size_mask; | ||
107 | nr_range++; | ||
108 | } | ||
109 | |||
110 | return nr_range; | ||
111 | } | ||
112 | |||
113 | #ifdef CONFIG_X86_64 | ||
114 | static void __init init_gbpages(void) | ||
115 | { | ||
116 | if (direct_gbpages && cpu_has_gbpages) | ||
117 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
118 | else | ||
119 | direct_gbpages = 0; | ||
120 | } | ||
121 | #else | ||
122 | static inline void init_gbpages(void) | ||
123 | { | ||
124 | } | ||
125 | #endif | ||
126 | |||
127 | /* | ||
128 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | ||
129 | * This runs before bootmem is initialized and gets pages directly from | ||
130 | * the physical memory. To access them they are temporarily mapped. | ||
131 | */ | ||
132 | unsigned long __init_refok init_memory_mapping(unsigned long start, | ||
133 | unsigned long end) | ||
134 | { | ||
135 | unsigned long page_size_mask = 0; | ||
136 | unsigned long start_pfn, end_pfn; | ||
137 | unsigned long ret = 0; | ||
138 | unsigned long pos; | ||
139 | |||
140 | struct map_range mr[NR_RANGE_MR]; | ||
141 | int nr_range, i; | ||
142 | int use_pse, use_gbpages; | ||
143 | |||
144 | printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); | ||
145 | |||
146 | if (!after_bootmem) | ||
147 | init_gbpages(); | ||
148 | |||
149 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
150 | /* | ||
151 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | ||
152 | * This will simplify cpa(), which otherwise needs to support splitting | ||
153 | * large pages into small in interrupt context, etc. | ||
154 | */ | ||
155 | use_pse = use_gbpages = 0; | ||
156 | #else | ||
157 | use_pse = cpu_has_pse; | ||
158 | use_gbpages = direct_gbpages; | ||
159 | #endif | ||
160 | |||
161 | #ifdef CONFIG_X86_32 | ||
162 | #ifdef CONFIG_X86_PAE | ||
163 | set_nx(); | ||
164 | if (nx_enabled) | ||
165 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | ||
166 | #endif | ||
167 | |||
168 | /* Enable PSE if available */ | ||
169 | if (cpu_has_pse) | ||
170 | set_in_cr4(X86_CR4_PSE); | ||
171 | |||
172 | /* Enable PGE if available */ | ||
173 | if (cpu_has_pge) { | ||
174 | set_in_cr4(X86_CR4_PGE); | ||
175 | __supported_pte_mask |= _PAGE_GLOBAL; | ||
176 | } | ||
177 | #endif | ||
178 | |||
179 | if (use_gbpages) | ||
180 | page_size_mask |= 1 << PG_LEVEL_1G; | ||
181 | if (use_pse) | ||
182 | page_size_mask |= 1 << PG_LEVEL_2M; | ||
183 | |||
184 | memset(mr, 0, sizeof(mr)); | ||
185 | nr_range = 0; | ||
186 | |||
187 | /* head if not big page alignment ? */ | ||
188 | start_pfn = start >> PAGE_SHIFT; | ||
189 | pos = start_pfn << PAGE_SHIFT; | ||
190 | #ifdef CONFIG_X86_32 | ||
191 | /* | ||
192 | * Don't use a large page for the first 2/4MB of memory | ||
193 | * because there are often fixed size MTRRs in there | ||
194 | * and overlapping MTRRs into large pages can cause | ||
195 | * slowdowns. | ||
196 | */ | ||
197 | if (pos == 0) | ||
198 | end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); | ||
199 | else | ||
200 | end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
201 | << (PMD_SHIFT - PAGE_SHIFT); | ||
202 | #else /* CONFIG_X86_64 */ | ||
203 | end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) | ||
204 | << (PMD_SHIFT - PAGE_SHIFT); | ||
205 | #endif | ||
206 | if (end_pfn > (end >> PAGE_SHIFT)) | ||
207 | end_pfn = end >> PAGE_SHIFT; | ||
208 | if (start_pfn < end_pfn) { | ||
209 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | ||
210 | pos = end_pfn << PAGE_SHIFT; | ||
211 | } | ||
212 | |||
213 | /* big page (2M) range */ | ||
214 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
215 | << (PMD_SHIFT - PAGE_SHIFT); | ||
216 | #ifdef CONFIG_X86_32 | ||
217 | end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | ||
218 | #else /* CONFIG_X86_64 */ | ||
219 | end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | ||
220 | << (PUD_SHIFT - PAGE_SHIFT); | ||
221 | if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) | ||
222 | end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); | ||
223 | #endif | ||
224 | |||
225 | if (start_pfn < end_pfn) { | ||
226 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
227 | page_size_mask & (1<<PG_LEVEL_2M)); | ||
228 | pos = end_pfn << PAGE_SHIFT; | ||
229 | } | ||
230 | |||
231 | #ifdef CONFIG_X86_64 | ||
232 | /* big page (1G) range */ | ||
233 | start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | ||
234 | << (PUD_SHIFT - PAGE_SHIFT); | ||
235 | end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); | ||
236 | if (start_pfn < end_pfn) { | ||
237 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
238 | page_size_mask & | ||
239 | ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); | ||
240 | pos = end_pfn << PAGE_SHIFT; | ||
241 | } | ||
242 | |||
243 | /* tail is not big page (1G) alignment */ | ||
244 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
245 | << (PMD_SHIFT - PAGE_SHIFT); | ||
246 | end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | ||
247 | if (start_pfn < end_pfn) { | ||
248 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
249 | page_size_mask & (1<<PG_LEVEL_2M)); | ||
250 | pos = end_pfn << PAGE_SHIFT; | ||
251 | } | ||
252 | #endif | ||
253 | |||
254 | /* tail is not big page (2M) alignment */ | ||
255 | start_pfn = pos>>PAGE_SHIFT; | ||
256 | end_pfn = end>>PAGE_SHIFT; | ||
257 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | ||
258 | |||
259 | /* try to merge same page size and continuous */ | ||
260 | for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { | ||
261 | unsigned long old_start; | ||
262 | if (mr[i].end != mr[i+1].start || | ||
263 | mr[i].page_size_mask != mr[i+1].page_size_mask) | ||
264 | continue; | ||
265 | /* move it */ | ||
266 | old_start = mr[i].start; | ||
267 | memmove(&mr[i], &mr[i+1], | ||
268 | (nr_range - 1 - i) * sizeof(struct map_range)); | ||
269 | mr[i--].start = old_start; | ||
270 | nr_range--; | ||
271 | } | ||
272 | |||
273 | for (i = 0; i < nr_range; i++) | ||
274 | printk(KERN_DEBUG " %010lx - %010lx page %s\n", | ||
275 | mr[i].start, mr[i].end, | ||
276 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( | ||
277 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | ||
278 | |||
279 | /* | ||
280 | * Find space for the kernel direct mapping tables. | ||
281 | * | ||
282 | * Later we should allocate these tables in the local node of the | ||
283 | * memory mapped. Unfortunately this is done currently before the | ||
284 | * nodes are discovered. | ||
285 | */ | ||
286 | if (!after_bootmem) | ||
287 | find_early_table_space(end, use_pse, use_gbpages); | ||
288 | |||
289 | #ifdef CONFIG_X86_32 | ||
290 | for (i = 0; i < nr_range; i++) | ||
291 | kernel_physical_mapping_init(mr[i].start, mr[i].end, | ||
292 | mr[i].page_size_mask); | ||
293 | ret = end; | ||
294 | #else /* CONFIG_X86_64 */ | ||
295 | for (i = 0; i < nr_range; i++) | ||
296 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, | ||
297 | mr[i].page_size_mask); | ||
298 | #endif | ||
299 | |||
300 | #ifdef CONFIG_X86_32 | ||
301 | early_ioremap_page_table_range_init(); | ||
302 | |||
303 | load_cr3(swapper_pg_dir); | ||
304 | #endif | ||
305 | |||
306 | #ifdef CONFIG_X86_64 | ||
307 | if (!after_bootmem) | ||
308 | mmu_cr4_features = read_cr4(); | ||
309 | #endif | ||
310 | __flush_tlb_all(); | ||
311 | |||
312 | if (!after_bootmem && e820_table_end > e820_table_start) | ||
313 | reserve_early(e820_table_start << PAGE_SHIFT, | ||
314 | e820_table_end << PAGE_SHIFT, "PGTABLE"); | ||
315 | |||
316 | if (!after_bootmem) | ||
317 | early_memtest(start, end); | ||
318 | |||
319 | return ret >> PAGE_SHIFT; | ||
320 | } | ||
321 | |||
322 | |||
323 | /* | ||
324 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | ||
325 | * is valid. The argument is a physical page number. | ||
326 | * | ||
327 | * | ||
328 | * On x86, access has to be given to the first megabyte of ram because that area | ||
329 | * contains bios code and data regions used by X and dosemu and similar apps. | ||
330 | * Access has to be given to non-kernel-ram areas as well, these contain the PCI | ||
331 | * mmio resources as well as potential bios/acpi data regions. | ||
332 | */ | ||
333 | int devmem_is_allowed(unsigned long pagenr) | ||
334 | { | ||
335 | if (pagenr <= 256) | ||
336 | return 1; | ||
337 | if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) | ||
338 | return 0; | ||
339 | if (!page_is_ram(pagenr)) | ||
340 | return 1; | ||
341 | return 0; | ||
342 | } | ||
6 | 343 | ||
7 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | 344 | void free_init_pages(char *what, unsigned long begin, unsigned long end) |
8 | { | 345 | { |
@@ -47,3 +384,10 @@ void free_initmem(void) | |||
47 | (unsigned long)(&__init_begin), | 384 | (unsigned long)(&__init_begin), |
48 | (unsigned long)(&__init_end)); | 385 | (unsigned long)(&__init_end)); |
49 | } | 386 | } |
387 | |||
388 | #ifdef CONFIG_BLK_DEV_INITRD | ||
389 | void free_initrd_mem(unsigned long start, unsigned long end) | ||
390 | { | ||
391 | free_init_pages("initrd memory", start, end); | ||
392 | } | ||
393 | #endif | ||
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 47df0e1bbeb9..db81e9a8556b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <asm/paravirt.h> | 49 | #include <asm/paravirt.h> |
50 | #include <asm/setup.h> | 50 | #include <asm/setup.h> |
51 | #include <asm/cacheflush.h> | 51 | #include <asm/cacheflush.h> |
52 | #include <asm/init.h> | ||
52 | 53 | ||
53 | unsigned long max_low_pfn_mapped; | 54 | unsigned long max_low_pfn_mapped; |
54 | unsigned long max_pfn_mapped; | 55 | unsigned long max_pfn_mapped; |
@@ -58,19 +59,14 @@ unsigned long highstart_pfn, highend_pfn; | |||
58 | 59 | ||
59 | static noinline int do_test_wp_bit(void); | 60 | static noinline int do_test_wp_bit(void); |
60 | 61 | ||
61 | 62 | bool __read_mostly __vmalloc_start_set = false; | |
62 | static unsigned long __initdata table_start; | ||
63 | static unsigned long __meminitdata table_end; | ||
64 | static unsigned long __meminitdata table_top; | ||
65 | |||
66 | static int __initdata after_init_bootmem; | ||
67 | 63 | ||
68 | static __init void *alloc_low_page(void) | 64 | static __init void *alloc_low_page(void) |
69 | { | 65 | { |
70 | unsigned long pfn = table_end++; | 66 | unsigned long pfn = e820_table_end++; |
71 | void *adr; | 67 | void *adr; |
72 | 68 | ||
73 | if (pfn >= table_top) | 69 | if (pfn >= e820_table_top) |
74 | panic("alloc_low_page: ran out of memory"); | 70 | panic("alloc_low_page: ran out of memory"); |
75 | 71 | ||
76 | adr = __va(pfn * PAGE_SIZE); | 72 | adr = __va(pfn * PAGE_SIZE); |
@@ -90,7 +86,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
90 | 86 | ||
91 | #ifdef CONFIG_X86_PAE | 87 | #ifdef CONFIG_X86_PAE |
92 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { | 88 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
93 | if (after_init_bootmem) | 89 | if (after_bootmem) |
94 | pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | 90 | pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
95 | else | 91 | else |
96 | pmd_table = (pmd_t *)alloc_low_page(); | 92 | pmd_table = (pmd_t *)alloc_low_page(); |
@@ -117,7 +113,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
117 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { | 113 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { |
118 | pte_t *page_table = NULL; | 114 | pte_t *page_table = NULL; |
119 | 115 | ||
120 | if (after_init_bootmem) { | 116 | if (after_bootmem) { |
121 | #ifdef CONFIG_DEBUG_PAGEALLOC | 117 | #ifdef CONFIG_DEBUG_PAGEALLOC |
122 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | 118 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); |
123 | #endif | 119 | #endif |
@@ -168,12 +164,12 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, | |||
168 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end | 164 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end |
169 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin | 165 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin |
170 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end | 166 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end |
171 | && ((__pa(pte) >> PAGE_SHIFT) < table_start | 167 | && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start |
172 | || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { | 168 | || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) { |
173 | pte_t *newpte; | 169 | pte_t *newpte; |
174 | int i; | 170 | int i; |
175 | 171 | ||
176 | BUG_ON(after_init_bootmem); | 172 | BUG_ON(after_bootmem); |
177 | newpte = alloc_low_page(); | 173 | newpte = alloc_low_page(); |
178 | for (i = 0; i < PTRS_PER_PTE; i++) | 174 | for (i = 0; i < PTRS_PER_PTE; i++) |
179 | set_pte(newpte + i, pte[i]); | 175 | set_pte(newpte + i, pte[i]); |
@@ -242,11 +238,14 @@ static inline int is_kernel_text(unsigned long addr) | |||
242 | * of max_low_pfn pages, by creating page tables starting from address | 238 | * of max_low_pfn pages, by creating page tables starting from address |
243 | * PAGE_OFFSET: | 239 | * PAGE_OFFSET: |
244 | */ | 240 | */ |
245 | static void __init kernel_physical_mapping_init(pgd_t *pgd_base, | 241 | unsigned long __init |
246 | unsigned long start_pfn, | 242 | kernel_physical_mapping_init(unsigned long start, |
247 | unsigned long end_pfn, | 243 | unsigned long end, |
248 | int use_pse) | 244 | unsigned long page_size_mask) |
249 | { | 245 | { |
246 | int use_pse = page_size_mask == (1<<PG_LEVEL_2M); | ||
247 | unsigned long start_pfn, end_pfn; | ||
248 | pgd_t *pgd_base = swapper_pg_dir; | ||
250 | int pgd_idx, pmd_idx, pte_ofs; | 249 | int pgd_idx, pmd_idx, pte_ofs; |
251 | unsigned long pfn; | 250 | unsigned long pfn; |
252 | pgd_t *pgd; | 251 | pgd_t *pgd; |
@@ -255,6 +254,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, | |||
255 | unsigned pages_2m, pages_4k; | 254 | unsigned pages_2m, pages_4k; |
256 | int mapping_iter; | 255 | int mapping_iter; |
257 | 256 | ||
257 | start_pfn = start >> PAGE_SHIFT; | ||
258 | end_pfn = end >> PAGE_SHIFT; | ||
259 | |||
258 | /* | 260 | /* |
259 | * First iteration will setup identity mapping using large/small pages | 261 | * First iteration will setup identity mapping using large/small pages |
260 | * based on use_pse, with other attributes same as set by | 262 | * based on use_pse, with other attributes same as set by |
@@ -369,26 +371,6 @@ repeat: | |||
369 | mapping_iter = 2; | 371 | mapping_iter = 2; |
370 | goto repeat; | 372 | goto repeat; |
371 | } | 373 | } |
372 | } | ||
373 | |||
374 | /* | ||
375 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | ||
376 | * is valid. The argument is a physical page number. | ||
377 | * | ||
378 | * | ||
379 | * On x86, access has to be given to the first megabyte of ram because that area | ||
380 | * contains bios code and data regions used by X and dosemu and similar apps. | ||
381 | * Access has to be given to non-kernel-ram areas as well, these contain the PCI | ||
382 | * mmio resources as well as potential bios/acpi data regions. | ||
383 | */ | ||
384 | int devmem_is_allowed(unsigned long pagenr) | ||
385 | { | ||
386 | if (pagenr <= 256) | ||
387 | return 1; | ||
388 | if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) | ||
389 | return 0; | ||
390 | if (!page_is_ram(pagenr)) | ||
391 | return 1; | ||
392 | return 0; | 374 | return 0; |
393 | } | 375 | } |
394 | 376 | ||
@@ -545,8 +527,9 @@ void __init native_pagetable_setup_done(pgd_t *base) | |||
545 | * be partially populated, and so it avoids stomping on any existing | 527 | * be partially populated, and so it avoids stomping on any existing |
546 | * mappings. | 528 | * mappings. |
547 | */ | 529 | */ |
548 | static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) | 530 | void __init early_ioremap_page_table_range_init(void) |
549 | { | 531 | { |
532 | pgd_t *pgd_base = swapper_pg_dir; | ||
550 | unsigned long vaddr, end; | 533 | unsigned long vaddr, end; |
551 | 534 | ||
552 | /* | 535 | /* |
@@ -641,7 +624,7 @@ static int __init noexec_setup(char *str) | |||
641 | } | 624 | } |
642 | early_param("noexec", noexec_setup); | 625 | early_param("noexec", noexec_setup); |
643 | 626 | ||
644 | static void __init set_nx(void) | 627 | void __init set_nx(void) |
645 | { | 628 | { |
646 | unsigned int v[4], l, h; | 629 | unsigned int v[4], l, h; |
647 | 630 | ||
@@ -793,6 +776,8 @@ void __init initmem_init(unsigned long start_pfn, | |||
793 | #ifdef CONFIG_FLATMEM | 776 | #ifdef CONFIG_FLATMEM |
794 | max_mapnr = num_physpages; | 777 | max_mapnr = num_physpages; |
795 | #endif | 778 | #endif |
779 | __vmalloc_start_set = true; | ||
780 | |||
796 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", | 781 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", |
797 | pages_to_mb(max_low_pfn)); | 782 | pages_to_mb(max_low_pfn)); |
798 | 783 | ||
@@ -814,176 +799,66 @@ static void __init zone_sizes_init(void) | |||
814 | free_area_init_nodes(max_zone_pfns); | 799 | free_area_init_nodes(max_zone_pfns); |
815 | } | 800 | } |
816 | 801 | ||
802 | static unsigned long __init setup_node_bootmem(int nodeid, | ||
803 | unsigned long start_pfn, | ||
804 | unsigned long end_pfn, | ||
805 | unsigned long bootmap) | ||
806 | { | ||
807 | unsigned long bootmap_size; | ||
808 | |||
809 | /* don't touch min_low_pfn */ | ||
810 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | ||
811 | bootmap >> PAGE_SHIFT, | ||
812 | start_pfn, end_pfn); | ||
813 | printk(KERN_INFO " node %d low ram: %08lx - %08lx\n", | ||
814 | nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
815 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", | ||
816 | nodeid, bootmap, bootmap + bootmap_size); | ||
817 | free_bootmem_with_active_regions(nodeid, end_pfn); | ||
818 | early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
819 | |||
820 | return bootmap + bootmap_size; | ||
821 | } | ||
822 | |||
817 | void __init setup_bootmem_allocator(void) | 823 | void __init setup_bootmem_allocator(void) |
818 | { | 824 | { |
819 | int i; | 825 | int nodeid; |
820 | unsigned long bootmap_size, bootmap; | 826 | unsigned long bootmap_size, bootmap; |
821 | /* | 827 | /* |
822 | * Initialize the boot-time allocator (with low memory only): | 828 | * Initialize the boot-time allocator (with low memory only): |
823 | */ | 829 | */ |
824 | bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; | 830 | bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; |
825 | bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, | 831 | bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size, |
826 | max_pfn_mapped<<PAGE_SHIFT, bootmap_size, | ||
827 | PAGE_SIZE); | 832 | PAGE_SIZE); |
828 | if (bootmap == -1L) | 833 | if (bootmap == -1L) |
829 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 834 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
830 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | 835 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); |
831 | 836 | ||
832 | /* don't touch min_low_pfn */ | ||
833 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, | ||
834 | min_low_pfn, max_low_pfn); | ||
835 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 837 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
836 | max_pfn_mapped<<PAGE_SHIFT); | 838 | max_pfn_mapped<<PAGE_SHIFT); |
837 | printk(KERN_INFO " low ram: %08lx - %08lx\n", | 839 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); |
838 | min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT); | ||
839 | printk(KERN_INFO " bootmap %08lx - %08lx\n", | ||
840 | bootmap, bootmap + bootmap_size); | ||
841 | for_each_online_node(i) | ||
842 | free_bootmem_with_active_regions(i, max_low_pfn); | ||
843 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); | ||
844 | |||
845 | after_init_bootmem = 1; | ||
846 | } | ||
847 | |||
848 | static void __init find_early_table_space(unsigned long end, int use_pse) | ||
849 | { | ||
850 | unsigned long puds, pmds, ptes, tables, start; | ||
851 | 840 | ||
852 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | 841 | for_each_online_node(nodeid) { |
853 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); | 842 | unsigned long start_pfn, end_pfn; |
854 | 843 | ||
855 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | 844 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
856 | tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); | 845 | start_pfn = node_start_pfn[nodeid]; |
857 | 846 | end_pfn = node_end_pfn[nodeid]; | |
858 | if (use_pse) { | 847 | if (start_pfn > max_low_pfn) |
859 | unsigned long extra; | 848 | continue; |
860 | 849 | if (end_pfn > max_low_pfn) | |
861 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); | 850 | end_pfn = max_low_pfn; |
862 | extra += PMD_SIZE; | ||
863 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
864 | } else | ||
865 | ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
866 | |||
867 | tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); | ||
868 | |||
869 | /* for fixmap */ | ||
870 | tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); | ||
871 | |||
872 | /* | ||
873 | * RED-PEN putting page tables only on node 0 could | ||
874 | * cause a hotspot and fill up ZONE_DMA. The page tables | ||
875 | * need roughly 0.5KB per GB. | ||
876 | */ | ||
877 | start = 0x7000; | ||
878 | table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | ||
879 | tables, PAGE_SIZE); | ||
880 | if (table_start == -1UL) | ||
881 | panic("Cannot find space for the kernel page tables"); | ||
882 | |||
883 | table_start >>= PAGE_SHIFT; | ||
884 | table_end = table_start; | ||
885 | table_top = table_start + (tables>>PAGE_SHIFT); | ||
886 | |||
887 | printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", | ||
888 | end, table_start << PAGE_SHIFT, | ||
889 | (table_start << PAGE_SHIFT) + tables); | ||
890 | } | ||
891 | |||
892 | unsigned long __init_refok init_memory_mapping(unsigned long start, | ||
893 | unsigned long end) | ||
894 | { | ||
895 | pgd_t *pgd_base = swapper_pg_dir; | ||
896 | unsigned long start_pfn, end_pfn; | ||
897 | unsigned long big_page_start; | ||
898 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
899 | /* | ||
900 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | ||
901 | * This will simplify cpa(), which otherwise needs to support splitting | ||
902 | * large pages into small in interrupt context, etc. | ||
903 | */ | ||
904 | int use_pse = 0; | ||
905 | #else | 851 | #else |
906 | int use_pse = cpu_has_pse; | 852 | start_pfn = 0; |
853 | end_pfn = max_low_pfn; | ||
907 | #endif | 854 | #endif |
908 | 855 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, | |
909 | /* | 856 | bootmap); |
910 | * Find space for the kernel direct mapping tables. | ||
911 | */ | ||
912 | if (!after_init_bootmem) | ||
913 | find_early_table_space(end, use_pse); | ||
914 | |||
915 | #ifdef CONFIG_X86_PAE | ||
916 | set_nx(); | ||
917 | if (nx_enabled) | ||
918 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | ||
919 | #endif | ||
920 | |||
921 | /* Enable PSE if available */ | ||
922 | if (cpu_has_pse) | ||
923 | set_in_cr4(X86_CR4_PSE); | ||
924 | |||
925 | /* Enable PGE if available */ | ||
926 | if (cpu_has_pge) { | ||
927 | set_in_cr4(X86_CR4_PGE); | ||
928 | __supported_pte_mask |= _PAGE_GLOBAL; | ||
929 | } | ||
930 | |||
931 | /* | ||
932 | * Don't use a large page for the first 2/4MB of memory | ||
933 | * because there are often fixed size MTRRs in there | ||
934 | * and overlapping MTRRs into large pages can cause | ||
935 | * slowdowns. | ||
936 | */ | ||
937 | big_page_start = PMD_SIZE; | ||
938 | |||
939 | if (start < big_page_start) { | ||
940 | start_pfn = start >> PAGE_SHIFT; | ||
941 | end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT); | ||
942 | } else { | ||
943 | /* head is not big page alignment ? */ | ||
944 | start_pfn = start >> PAGE_SHIFT; | ||
945 | end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
946 | << (PMD_SHIFT - PAGE_SHIFT); | ||
947 | } | 857 | } |
948 | if (start_pfn < end_pfn) | ||
949 | kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0); | ||
950 | |||
951 | /* big page range */ | ||
952 | start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
953 | << (PMD_SHIFT - PAGE_SHIFT); | ||
954 | if (start_pfn < (big_page_start >> PAGE_SHIFT)) | ||
955 | start_pfn = big_page_start >> PAGE_SHIFT; | ||
956 | end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | ||
957 | if (start_pfn < end_pfn) | ||
958 | kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, | ||
959 | use_pse); | ||
960 | |||
961 | /* tail is not big page alignment ? */ | ||
962 | start_pfn = end_pfn; | ||
963 | if (start_pfn > (big_page_start>>PAGE_SHIFT)) { | ||
964 | end_pfn = end >> PAGE_SHIFT; | ||
965 | if (start_pfn < end_pfn) | ||
966 | kernel_physical_mapping_init(pgd_base, start_pfn, | ||
967 | end_pfn, 0); | ||
968 | } | ||
969 | |||
970 | early_ioremap_page_table_range_init(pgd_base); | ||
971 | 858 | ||
972 | load_cr3(swapper_pg_dir); | 859 | after_bootmem = 1; |
973 | |||
974 | __flush_tlb_all(); | ||
975 | |||
976 | if (!after_init_bootmem) | ||
977 | reserve_early(table_start << PAGE_SHIFT, | ||
978 | table_end << PAGE_SHIFT, "PGTABLE"); | ||
979 | |||
980 | if (!after_init_bootmem) | ||
981 | early_memtest(start, end); | ||
982 | |||
983 | return end >> PAGE_SHIFT; | ||
984 | } | 860 | } |
985 | 861 | ||
986 | |||
987 | /* | 862 | /* |
988 | * paging_init() sets up the page tables - note that the first 8MB are | 863 | * paging_init() sets up the page tables - note that the first 8MB are |
989 | * already mapped by head.S. | 864 | * already mapped by head.S. |
@@ -1217,13 +1092,6 @@ void mark_rodata_ro(void) | |||
1217 | } | 1092 | } |
1218 | #endif | 1093 | #endif |
1219 | 1094 | ||
1220 | #ifdef CONFIG_BLK_DEV_INITRD | ||
1221 | void free_initrd_mem(unsigned long start, unsigned long end) | ||
1222 | { | ||
1223 | free_init_pages("initrd memory", start, end); | ||
1224 | } | ||
1225 | #endif | ||
1226 | |||
1227 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, | 1095 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, |
1228 | int flags) | 1096 | int flags) |
1229 | { | 1097 | { |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 07f44d491df1..54efa57d1c03 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <asm/kdebug.h> | 48 | #include <asm/kdebug.h> |
49 | #include <asm/numa.h> | 49 | #include <asm/numa.h> |
50 | #include <asm/cacheflush.h> | 50 | #include <asm/cacheflush.h> |
51 | #include <asm/init.h> | ||
51 | 52 | ||
52 | /* | 53 | /* |
53 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 54 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. |
@@ -61,12 +62,6 @@ static unsigned long dma_reserve __initdata; | |||
61 | 62 | ||
62 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 63 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
63 | 64 | ||
64 | int direct_gbpages | ||
65 | #ifdef CONFIG_DIRECT_GBPAGES | ||
66 | = 1 | ||
67 | #endif | ||
68 | ; | ||
69 | |||
70 | static int __init parse_direct_gbpages_off(char *arg) | 65 | static int __init parse_direct_gbpages_off(char *arg) |
71 | { | 66 | { |
72 | direct_gbpages = 0; | 67 | direct_gbpages = 0; |
@@ -87,12 +82,10 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
87 | * around without checking the pgd every time. | 82 | * around without checking the pgd every time. |
88 | */ | 83 | */ |
89 | 84 | ||
90 | int after_bootmem; | ||
91 | |||
92 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; | 85 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; |
93 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 86 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
94 | 87 | ||
95 | static int do_not_nx __cpuinitdata; | 88 | static int disable_nx __cpuinitdata; |
96 | 89 | ||
97 | /* | 90 | /* |
98 | * noexec=on|off | 91 | * noexec=on|off |
@@ -107,9 +100,9 @@ static int __init nonx_setup(char *str) | |||
107 | return -EINVAL; | 100 | return -EINVAL; |
108 | if (!strncmp(str, "on", 2)) { | 101 | if (!strncmp(str, "on", 2)) { |
109 | __supported_pte_mask |= _PAGE_NX; | 102 | __supported_pte_mask |= _PAGE_NX; |
110 | do_not_nx = 0; | 103 | disable_nx = 0; |
111 | } else if (!strncmp(str, "off", 3)) { | 104 | } else if (!strncmp(str, "off", 3)) { |
112 | do_not_nx = 1; | 105 | disable_nx = 1; |
113 | __supported_pte_mask &= ~_PAGE_NX; | 106 | __supported_pte_mask &= ~_PAGE_NX; |
114 | } | 107 | } |
115 | return 0; | 108 | return 0; |
@@ -121,7 +114,7 @@ void __cpuinit check_efer(void) | |||
121 | unsigned long efer; | 114 | unsigned long efer; |
122 | 115 | ||
123 | rdmsrl(MSR_EFER, efer); | 116 | rdmsrl(MSR_EFER, efer); |
124 | if (!(efer & EFER_NX) || do_not_nx) | 117 | if (!(efer & EFER_NX) || disable_nx) |
125 | __supported_pte_mask &= ~_PAGE_NX; | 118 | __supported_pte_mask &= ~_PAGE_NX; |
126 | } | 119 | } |
127 | 120 | ||
@@ -325,13 +318,9 @@ void __init cleanup_highmap(void) | |||
325 | } | 318 | } |
326 | } | 319 | } |
327 | 320 | ||
328 | static unsigned long __initdata table_start; | ||
329 | static unsigned long __meminitdata table_end; | ||
330 | static unsigned long __meminitdata table_top; | ||
331 | |||
332 | static __ref void *alloc_low_page(unsigned long *phys) | 321 | static __ref void *alloc_low_page(unsigned long *phys) |
333 | { | 322 | { |
334 | unsigned long pfn = table_end++; | 323 | unsigned long pfn = e820_table_end++; |
335 | void *adr; | 324 | void *adr; |
336 | 325 | ||
337 | if (after_bootmem) { | 326 | if (after_bootmem) { |
@@ -341,7 +330,7 @@ static __ref void *alloc_low_page(unsigned long *phys) | |||
341 | return adr; | 330 | return adr; |
342 | } | 331 | } |
343 | 332 | ||
344 | if (pfn >= table_top) | 333 | if (pfn >= e820_table_top) |
345 | panic("alloc_low_page: ran out of memory"); | 334 | panic("alloc_low_page: ran out of memory"); |
346 | 335 | ||
347 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); | 336 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); |
@@ -581,58 +570,10 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, | |||
581 | return phys_pud_init(pud, addr, end, page_size_mask); | 570 | return phys_pud_init(pud, addr, end, page_size_mask); |
582 | } | 571 | } |
583 | 572 | ||
584 | static void __init find_early_table_space(unsigned long end, int use_pse, | 573 | unsigned long __init |
585 | int use_gbpages) | 574 | kernel_physical_mapping_init(unsigned long start, |
586 | { | 575 | unsigned long end, |
587 | unsigned long puds, pmds, ptes, tables, start; | 576 | unsigned long page_size_mask) |
588 | |||
589 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | ||
590 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); | ||
591 | if (use_gbpages) { | ||
592 | unsigned long extra; | ||
593 | extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); | ||
594 | pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; | ||
595 | } else | ||
596 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | ||
597 | tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); | ||
598 | |||
599 | if (use_pse) { | ||
600 | unsigned long extra; | ||
601 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); | ||
602 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
603 | } else | ||
604 | ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
605 | tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); | ||
606 | |||
607 | /* | ||
608 | * RED-PEN putting page tables only on node 0 could | ||
609 | * cause a hotspot and fill up ZONE_DMA. The page tables | ||
610 | * need roughly 0.5KB per GB. | ||
611 | */ | ||
612 | start = 0x8000; | ||
613 | table_start = find_e820_area(start, end, tables, PAGE_SIZE); | ||
614 | if (table_start == -1UL) | ||
615 | panic("Cannot find space for the kernel page tables"); | ||
616 | |||
617 | table_start >>= PAGE_SHIFT; | ||
618 | table_end = table_start; | ||
619 | table_top = table_start + (tables >> PAGE_SHIFT); | ||
620 | |||
621 | printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", | ||
622 | end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT); | ||
623 | } | ||
624 | |||
625 | static void __init init_gbpages(void) | ||
626 | { | ||
627 | if (direct_gbpages && cpu_has_gbpages) | ||
628 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
629 | else | ||
630 | direct_gbpages = 0; | ||
631 | } | ||
632 | |||
633 | static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, | ||
634 | unsigned long end, | ||
635 | unsigned long page_size_mask) | ||
636 | { | 577 | { |
637 | 578 | ||
638 | unsigned long next, last_map_addr = end; | 579 | unsigned long next, last_map_addr = end; |
@@ -669,176 +610,6 @@ static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, | |||
669 | return last_map_addr; | 610 | return last_map_addr; |
670 | } | 611 | } |
671 | 612 | ||
672 | struct map_range { | ||
673 | unsigned long start; | ||
674 | unsigned long end; | ||
675 | unsigned page_size_mask; | ||
676 | }; | ||
677 | |||
678 | #define NR_RANGE_MR 5 | ||
679 | |||
680 | static int save_mr(struct map_range *mr, int nr_range, | ||
681 | unsigned long start_pfn, unsigned long end_pfn, | ||
682 | unsigned long page_size_mask) | ||
683 | { | ||
684 | |||
685 | if (start_pfn < end_pfn) { | ||
686 | if (nr_range >= NR_RANGE_MR) | ||
687 | panic("run out of range for init_memory_mapping\n"); | ||
688 | mr[nr_range].start = start_pfn<<PAGE_SHIFT; | ||
689 | mr[nr_range].end = end_pfn<<PAGE_SHIFT; | ||
690 | mr[nr_range].page_size_mask = page_size_mask; | ||
691 | nr_range++; | ||
692 | } | ||
693 | |||
694 | return nr_range; | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | ||
699 | * This runs before bootmem is initialized and gets pages directly from | ||
700 | * the physical memory. To access them they are temporarily mapped. | ||
701 | */ | ||
702 | unsigned long __init_refok init_memory_mapping(unsigned long start, | ||
703 | unsigned long end) | ||
704 | { | ||
705 | unsigned long last_map_addr = 0; | ||
706 | unsigned long page_size_mask = 0; | ||
707 | unsigned long start_pfn, end_pfn; | ||
708 | unsigned long pos; | ||
709 | |||
710 | struct map_range mr[NR_RANGE_MR]; | ||
711 | int nr_range, i; | ||
712 | int use_pse, use_gbpages; | ||
713 | |||
714 | printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); | ||
715 | |||
716 | /* | ||
717 | * Find space for the kernel direct mapping tables. | ||
718 | * | ||
719 | * Later we should allocate these tables in the local node of the | ||
720 | * memory mapped. Unfortunately this is done currently before the | ||
721 | * nodes are discovered. | ||
722 | */ | ||
723 | if (!after_bootmem) | ||
724 | init_gbpages(); | ||
725 | |||
726 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
727 | /* | ||
728 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | ||
729 | * This will simplify cpa(), which otherwise needs to support splitting | ||
730 | * large pages into small in interrupt context, etc. | ||
731 | */ | ||
732 | use_pse = use_gbpages = 0; | ||
733 | #else | ||
734 | use_pse = cpu_has_pse; | ||
735 | use_gbpages = direct_gbpages; | ||
736 | #endif | ||
737 | |||
738 | if (use_gbpages) | ||
739 | page_size_mask |= 1 << PG_LEVEL_1G; | ||
740 | if (use_pse) | ||
741 | page_size_mask |= 1 << PG_LEVEL_2M; | ||
742 | |||
743 | memset(mr, 0, sizeof(mr)); | ||
744 | nr_range = 0; | ||
745 | |||
746 | /* head if not big page alignment ?*/ | ||
747 | start_pfn = start >> PAGE_SHIFT; | ||
748 | pos = start_pfn << PAGE_SHIFT; | ||
749 | end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) | ||
750 | << (PMD_SHIFT - PAGE_SHIFT); | ||
751 | if (end_pfn > (end >> PAGE_SHIFT)) | ||
752 | end_pfn = end >> PAGE_SHIFT; | ||
753 | if (start_pfn < end_pfn) { | ||
754 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | ||
755 | pos = end_pfn << PAGE_SHIFT; | ||
756 | } | ||
757 | |||
758 | /* big page (2M) range*/ | ||
759 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
760 | << (PMD_SHIFT - PAGE_SHIFT); | ||
761 | end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | ||
762 | << (PUD_SHIFT - PAGE_SHIFT); | ||
763 | if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) | ||
764 | end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); | ||
765 | if (start_pfn < end_pfn) { | ||
766 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
767 | page_size_mask & (1<<PG_LEVEL_2M)); | ||
768 | pos = end_pfn << PAGE_SHIFT; | ||
769 | } | ||
770 | |||
771 | /* big page (1G) range */ | ||
772 | start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | ||
773 | << (PUD_SHIFT - PAGE_SHIFT); | ||
774 | end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); | ||
775 | if (start_pfn < end_pfn) { | ||
776 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
777 | page_size_mask & | ||
778 | ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); | ||
779 | pos = end_pfn << PAGE_SHIFT; | ||
780 | } | ||
781 | |||
782 | /* tail is not big page (1G) alignment */ | ||
783 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
784 | << (PMD_SHIFT - PAGE_SHIFT); | ||
785 | end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | ||
786 | if (start_pfn < end_pfn) { | ||
787 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
788 | page_size_mask & (1<<PG_LEVEL_2M)); | ||
789 | pos = end_pfn << PAGE_SHIFT; | ||
790 | } | ||
791 | |||
792 | /* tail is not big page (2M) alignment */ | ||
793 | start_pfn = pos>>PAGE_SHIFT; | ||
794 | end_pfn = end>>PAGE_SHIFT; | ||
795 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | ||
796 | |||
797 | /* try to merge same page size and continuous */ | ||
798 | for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { | ||
799 | unsigned long old_start; | ||
800 | if (mr[i].end != mr[i+1].start || | ||
801 | mr[i].page_size_mask != mr[i+1].page_size_mask) | ||
802 | continue; | ||
803 | /* move it */ | ||
804 | old_start = mr[i].start; | ||
805 | memmove(&mr[i], &mr[i+1], | ||
806 | (nr_range - 1 - i) * sizeof (struct map_range)); | ||
807 | mr[i--].start = old_start; | ||
808 | nr_range--; | ||
809 | } | ||
810 | |||
811 | for (i = 0; i < nr_range; i++) | ||
812 | printk(KERN_DEBUG " %010lx - %010lx page %s\n", | ||
813 | mr[i].start, mr[i].end, | ||
814 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( | ||
815 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | ||
816 | |||
817 | if (!after_bootmem) | ||
818 | find_early_table_space(end, use_pse, use_gbpages); | ||
819 | |||
820 | for (i = 0; i < nr_range; i++) | ||
821 | last_map_addr = kernel_physical_mapping_init( | ||
822 | mr[i].start, mr[i].end, | ||
823 | mr[i].page_size_mask); | ||
824 | |||
825 | if (!after_bootmem) | ||
826 | mmu_cr4_features = read_cr4(); | ||
827 | __flush_tlb_all(); | ||
828 | |||
829 | if (!after_bootmem && table_end > table_start) | ||
830 | reserve_early(table_start << PAGE_SHIFT, | ||
831 | table_end << PAGE_SHIFT, "PGTABLE"); | ||
832 | |||
833 | printk(KERN_INFO "last_map_addr: %lx end: %lx\n", | ||
834 | last_map_addr, end); | ||
835 | |||
836 | if (!after_bootmem) | ||
837 | early_memtest(start, end); | ||
838 | |||
839 | return last_map_addr >> PAGE_SHIFT; | ||
840 | } | ||
841 | |||
842 | #ifndef CONFIG_NUMA | 613 | #ifndef CONFIG_NUMA |
843 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | 614 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) |
844 | { | 615 | { |
@@ -910,28 +681,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | |||
910 | 681 | ||
911 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 682 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
912 | 683 | ||
913 | /* | ||
914 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | ||
915 | * is valid. The argument is a physical page number. | ||
916 | * | ||
917 | * | ||
918 | * On x86, access has to be given to the first megabyte of ram because that area | ||
919 | * contains bios code and data regions used by X and dosemu and similar apps. | ||
920 | * Access has to be given to non-kernel-ram areas as well, these contain the PCI | ||
921 | * mmio resources as well as potential bios/acpi data regions. | ||
922 | */ | ||
923 | int devmem_is_allowed(unsigned long pagenr) | ||
924 | { | ||
925 | if (pagenr <= 256) | ||
926 | return 1; | ||
927 | if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) | ||
928 | return 0; | ||
929 | if (!page_is_ram(pagenr)) | ||
930 | return 1; | ||
931 | return 0; | ||
932 | } | ||
933 | |||
934 | |||
935 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, | 684 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, |
936 | kcore_modules, kcore_vsyscall; | 685 | kcore_modules, kcore_vsyscall; |
937 | 686 | ||
@@ -1019,13 +768,6 @@ void mark_rodata_ro(void) | |||
1019 | 768 | ||
1020 | #endif | 769 | #endif |
1021 | 770 | ||
1022 | #ifdef CONFIG_BLK_DEV_INITRD | ||
1023 | void free_initrd_mem(unsigned long start, unsigned long end) | ||
1024 | { | ||
1025 | free_init_pages("initrd memory", start, end); | ||
1026 | } | ||
1027 | #endif | ||
1028 | |||
1029 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, | 771 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, |
1030 | int flags) | 772 | int flags) |
1031 | { | 773 | { |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 433f7bd4648a..aca924a30ee6 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -38,8 +38,7 @@ unsigned long __phys_addr(unsigned long x) | |||
38 | } else { | 38 | } else { |
39 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); | 39 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); |
40 | x -= PAGE_OFFSET; | 40 | x -= PAGE_OFFSET; |
41 | VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM : | 41 | VIRTUAL_BUG_ON(!phys_addr_valid(x)); |
42 | !phys_addr_valid(x)); | ||
43 | } | 42 | } |
44 | return x; | 43 | return x; |
45 | } | 44 | } |
@@ -56,10 +55,8 @@ bool __virt_addr_valid(unsigned long x) | |||
56 | if (x < PAGE_OFFSET) | 55 | if (x < PAGE_OFFSET) |
57 | return false; | 56 | return false; |
58 | x -= PAGE_OFFSET; | 57 | x -= PAGE_OFFSET; |
59 | if (system_state == SYSTEM_BOOTING ? | 58 | if (!phys_addr_valid(x)) |
60 | x > MAXMEM : !phys_addr_valid(x)) { | ||
61 | return false; | 59 | return false; |
62 | } | ||
63 | } | 60 | } |
64 | 61 | ||
65 | return pfn_valid(x >> PAGE_SHIFT); | 62 | return pfn_valid(x >> PAGE_SHIFT); |
@@ -76,10 +73,9 @@ static inline int phys_addr_valid(unsigned long addr) | |||
76 | #ifdef CONFIG_DEBUG_VIRTUAL | 73 | #ifdef CONFIG_DEBUG_VIRTUAL |
77 | unsigned long __phys_addr(unsigned long x) | 74 | unsigned long __phys_addr(unsigned long x) |
78 | { | 75 | { |
79 | /* VMALLOC_* aren't constants; not available at the boot time */ | 76 | /* VMALLOC_* aren't constants */ |
80 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); | 77 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); |
81 | VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING && | 78 | VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); |
82 | is_vmalloc_addr((void *) x)); | ||
83 | return x - PAGE_OFFSET; | 79 | return x - PAGE_OFFSET; |
84 | } | 80 | } |
85 | EXPORT_SYMBOL(__phys_addr); | 81 | EXPORT_SYMBOL(__phys_addr); |
@@ -89,7 +85,9 @@ bool __virt_addr_valid(unsigned long x) | |||
89 | { | 85 | { |
90 | if (x < PAGE_OFFSET) | 86 | if (x < PAGE_OFFSET) |
91 | return false; | 87 | return false; |
92 | if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x)) | 88 | if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) |
89 | return false; | ||
90 | if (x >= FIXADDR_START) | ||
93 | return false; | 91 | return false; |
94 | return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); | 92 | return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); |
95 | } | 93 | } |
@@ -508,13 +506,19 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) | |||
508 | return &bm_pte[pte_index(addr)]; | 506 | return &bm_pte[pte_index(addr)]; |
509 | } | 507 | } |
510 | 508 | ||
509 | static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; | ||
510 | |||
511 | void __init early_ioremap_init(void) | 511 | void __init early_ioremap_init(void) |
512 | { | 512 | { |
513 | pmd_t *pmd; | 513 | pmd_t *pmd; |
514 | int i; | ||
514 | 515 | ||
515 | if (early_ioremap_debug) | 516 | if (early_ioremap_debug) |
516 | printk(KERN_INFO "early_ioremap_init()\n"); | 517 | printk(KERN_INFO "early_ioremap_init()\n"); |
517 | 518 | ||
519 | for (i = 0; i < FIX_BTMAPS_SLOTS; i++) | ||
520 | slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); | ||
521 | |||
518 | pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); | 522 | pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); |
519 | memset(bm_pte, 0, sizeof(bm_pte)); | 523 | memset(bm_pte, 0, sizeof(bm_pte)); |
520 | pmd_populate_kernel(&init_mm, pmd, bm_pte); | 524 | pmd_populate_kernel(&init_mm, pmd, bm_pte); |
@@ -581,6 +585,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) | |||
581 | 585 | ||
582 | static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; | 586 | static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; |
583 | static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; | 587 | static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; |
588 | |||
584 | static int __init check_early_ioremap_leak(void) | 589 | static int __init check_early_ioremap_leak(void) |
585 | { | 590 | { |
586 | int count = 0; | 591 | int count = 0; |
@@ -602,7 +607,8 @@ static int __init check_early_ioremap_leak(void) | |||
602 | } | 607 | } |
603 | late_initcall(check_early_ioremap_leak); | 608 | late_initcall(check_early_ioremap_leak); |
604 | 609 | ||
605 | static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) | 610 | static void __init __iomem * |
611 | __early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) | ||
606 | { | 612 | { |
607 | unsigned long offset, last_addr; | 613 | unsigned long offset, last_addr; |
608 | unsigned int nrpages; | 614 | unsigned int nrpages; |
@@ -668,9 +674,9 @@ static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned lo | |||
668 | --nrpages; | 674 | --nrpages; |
669 | } | 675 | } |
670 | if (early_ioremap_debug) | 676 | if (early_ioremap_debug) |
671 | printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); | 677 | printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); |
672 | 678 | ||
673 | prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); | 679 | prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); |
674 | return prev_map[slot]; | 680 | return prev_map[slot]; |
675 | } | 681 | } |
676 | 682 | ||
@@ -738,8 +744,3 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) | |||
738 | } | 744 | } |
739 | prev_map[slot] = NULL; | 745 | prev_map[slot] = NULL; |
740 | } | 746 | } |
741 | |||
742 | void __this_fixmap_does_not_exist(void) | ||
743 | { | ||
744 | WARN_ON(1); | ||
745 | } | ||
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 93d82038af4b..6a518dd08a36 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -32,11 +32,14 @@ struct kmmio_fault_page { | |||
32 | struct list_head list; | 32 | struct list_head list; |
33 | struct kmmio_fault_page *release_next; | 33 | struct kmmio_fault_page *release_next; |
34 | unsigned long page; /* location of the fault page */ | 34 | unsigned long page; /* location of the fault page */ |
35 | bool old_presence; /* page presence prior to arming */ | ||
36 | bool armed; | ||
35 | 37 | ||
36 | /* | 38 | /* |
37 | * Number of times this page has been registered as a part | 39 | * Number of times this page has been registered as a part |
38 | * of a probe. If zero, page is disarmed and this may be freed. | 40 | * of a probe. If zero, page is disarmed and this may be freed. |
39 | * Used only by writers (RCU). | 41 | * Used only by writers (RCU) and post_kmmio_handler(). |
42 | * Protected by kmmio_lock, when linked into kmmio_page_table. | ||
40 | */ | 43 | */ |
41 | int count; | 44 | int count; |
42 | }; | 45 | }; |
@@ -105,57 +108,85 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) | |||
105 | return NULL; | 108 | return NULL; |
106 | } | 109 | } |
107 | 110 | ||
108 | static void set_page_present(unsigned long addr, bool present, | 111 | static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) |
109 | unsigned int *pglevel) | 112 | { |
113 | pmdval_t v = pmd_val(*pmd); | ||
114 | *old = !!(v & _PAGE_PRESENT); | ||
115 | v &= ~_PAGE_PRESENT; | ||
116 | if (present) | ||
117 | v |= _PAGE_PRESENT; | ||
118 | set_pmd(pmd, __pmd(v)); | ||
119 | } | ||
120 | |||
121 | static void set_pte_presence(pte_t *pte, bool present, bool *old) | ||
122 | { | ||
123 | pteval_t v = pte_val(*pte); | ||
124 | *old = !!(v & _PAGE_PRESENT); | ||
125 | v &= ~_PAGE_PRESENT; | ||
126 | if (present) | ||
127 | v |= _PAGE_PRESENT; | ||
128 | set_pte_atomic(pte, __pte(v)); | ||
129 | } | ||
130 | |||
131 | static int set_page_presence(unsigned long addr, bool present, bool *old) | ||
110 | { | 132 | { |
111 | pteval_t pteval; | ||
112 | pmdval_t pmdval; | ||
113 | unsigned int level; | 133 | unsigned int level; |
114 | pmd_t *pmd; | ||
115 | pte_t *pte = lookup_address(addr, &level); | 134 | pte_t *pte = lookup_address(addr, &level); |
116 | 135 | ||
117 | if (!pte) { | 136 | if (!pte) { |
118 | pr_err("kmmio: no pte for page 0x%08lx\n", addr); | 137 | pr_err("kmmio: no pte for page 0x%08lx\n", addr); |
119 | return; | 138 | return -1; |
120 | } | 139 | } |
121 | 140 | ||
122 | if (pglevel) | ||
123 | *pglevel = level; | ||
124 | |||
125 | switch (level) { | 141 | switch (level) { |
126 | case PG_LEVEL_2M: | 142 | case PG_LEVEL_2M: |
127 | pmd = (pmd_t *)pte; | 143 | set_pmd_presence((pmd_t *)pte, present, old); |
128 | pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; | ||
129 | if (present) | ||
130 | pmdval |= _PAGE_PRESENT; | ||
131 | set_pmd(pmd, __pmd(pmdval)); | ||
132 | break; | 144 | break; |
133 | |||
134 | case PG_LEVEL_4K: | 145 | case PG_LEVEL_4K: |
135 | pteval = pte_val(*pte) & ~_PAGE_PRESENT; | 146 | set_pte_presence(pte, present, old); |
136 | if (present) | ||
137 | pteval |= _PAGE_PRESENT; | ||
138 | set_pte_atomic(pte, __pte(pteval)); | ||
139 | break; | 147 | break; |
140 | |||
141 | default: | 148 | default: |
142 | pr_err("kmmio: unexpected page level 0x%x.\n", level); | 149 | pr_err("kmmio: unexpected page level 0x%x.\n", level); |
143 | return; | 150 | return -1; |
144 | } | 151 | } |
145 | 152 | ||
146 | __flush_tlb_one(addr); | 153 | __flush_tlb_one(addr); |
154 | return 0; | ||
147 | } | 155 | } |
148 | 156 | ||
149 | /** Mark the given page as not present. Access to it will trigger a fault. */ | 157 | /* |
150 | static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) | 158 | * Mark the given page as not present. Access to it will trigger a fault. |
159 | * | ||
160 | * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the | ||
161 | * protection is ignored here. RCU read lock is assumed held, so the struct | ||
162 | * will not disappear unexpectedly. Furthermore, the caller must guarantee, | ||
163 | * that double arming the same virtual address (page) cannot occur. | ||
164 | * | ||
165 | * Double disarming on the other hand is allowed, and may occur when a fault | ||
166 | * and mmiotrace shutdown happen simultaneously. | ||
167 | */ | ||
168 | static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | ||
151 | { | 169 | { |
152 | set_page_present(page & PAGE_MASK, false, pglevel); | 170 | int ret; |
171 | WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); | ||
172 | if (f->armed) { | ||
173 | pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", | ||
174 | f->page, f->count, f->old_presence); | ||
175 | } | ||
176 | ret = set_page_presence(f->page, false, &f->old_presence); | ||
177 | WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); | ||
178 | f->armed = true; | ||
179 | return ret; | ||
153 | } | 180 | } |
154 | 181 | ||
155 | /** Mark the given page as present. */ | 182 | /** Restore the given page to saved presence state. */ |
156 | static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) | 183 | static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) |
157 | { | 184 | { |
158 | set_page_present(page & PAGE_MASK, true, pglevel); | 185 | bool tmp; |
186 | int ret = set_page_presence(f->page, f->old_presence, &tmp); | ||
187 | WARN_ONCE(ret < 0, | ||
188 | KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); | ||
189 | f->armed = false; | ||
159 | } | 190 | } |
160 | 191 | ||
161 | /* | 192 | /* |
@@ -202,28 +233,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
202 | 233 | ||
203 | ctx = &get_cpu_var(kmmio_ctx); | 234 | ctx = &get_cpu_var(kmmio_ctx); |
204 | if (ctx->active) { | 235 | if (ctx->active) { |
205 | disarm_kmmio_fault_page(faultpage->page, NULL); | ||
206 | if (addr == ctx->addr) { | 236 | if (addr == ctx->addr) { |
207 | /* | 237 | /* |
208 | * On SMP we sometimes get recursive probe hits on the | 238 | * A second fault on the same page means some other |
209 | * same address. Context is already saved, fall out. | 239 | * condition needs handling by do_page_fault(), the |
240 | * page really not being present is the most common. | ||
210 | */ | 241 | */ |
211 | pr_debug("kmmio: duplicate probe hit on CPU %d, for " | 242 | pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", |
212 | "address 0x%08lx.\n", | 243 | addr, smp_processor_id()); |
213 | smp_processor_id(), addr); | 244 | |
214 | ret = 1; | 245 | if (!faultpage->old_presence) |
215 | goto no_kmmio_ctx; | 246 | pr_info("kmmio: unexpected secondary hit for " |
216 | } | 247 | "address 0x%08lx on CPU %d.\n", addr, |
217 | /* | 248 | smp_processor_id()); |
218 | * Prevent overwriting already in-flight context. | 249 | } else { |
219 | * This should not happen, let's hope disarming at least | 250 | /* |
220 | * prevents a panic. | 251 | * Prevent overwriting already in-flight context. |
221 | */ | 252 | * This should not happen, let's hope disarming at |
222 | pr_emerg("kmmio: recursive probe hit on CPU %d, " | 253 | * least prevents a panic. |
254 | */ | ||
255 | pr_emerg("kmmio: recursive probe hit on CPU %d, " | ||
223 | "for address 0x%08lx. Ignoring.\n", | 256 | "for address 0x%08lx. Ignoring.\n", |
224 | smp_processor_id(), addr); | 257 | smp_processor_id(), addr); |
225 | pr_emerg("kmmio: previous hit was at 0x%08lx.\n", | 258 | pr_emerg("kmmio: previous hit was at 0x%08lx.\n", |
226 | ctx->addr); | 259 | ctx->addr); |
260 | disarm_kmmio_fault_page(faultpage); | ||
261 | } | ||
227 | goto no_kmmio_ctx; | 262 | goto no_kmmio_ctx; |
228 | } | 263 | } |
229 | ctx->active++; | 264 | ctx->active++; |
@@ -244,7 +279,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
244 | regs->flags &= ~X86_EFLAGS_IF; | 279 | regs->flags &= ~X86_EFLAGS_IF; |
245 | 280 | ||
246 | /* Now we set present bit in PTE and single step. */ | 281 | /* Now we set present bit in PTE and single step. */ |
247 | disarm_kmmio_fault_page(ctx->fpage->page, NULL); | 282 | disarm_kmmio_fault_page(ctx->fpage); |
248 | 283 | ||
249 | /* | 284 | /* |
250 | * If another cpu accesses the same page while we are stepping, | 285 | * If another cpu accesses the same page while we are stepping, |
@@ -275,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
275 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); | 310 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); |
276 | 311 | ||
277 | if (!ctx->active) { | 312 | if (!ctx->active) { |
278 | pr_debug("kmmio: spurious debug trap on CPU %d.\n", | 313 | pr_warning("kmmio: spurious debug trap on CPU %d.\n", |
279 | smp_processor_id()); | 314 | smp_processor_id()); |
280 | goto out; | 315 | goto out; |
281 | } | 316 | } |
@@ -283,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
283 | if (ctx->probe && ctx->probe->post_handler) | 318 | if (ctx->probe && ctx->probe->post_handler) |
284 | ctx->probe->post_handler(ctx->probe, condition, regs); | 319 | ctx->probe->post_handler(ctx->probe, condition, regs); |
285 | 320 | ||
286 | arm_kmmio_fault_page(ctx->fpage->page, NULL); | 321 | /* Prevent racing against release_kmmio_fault_page(). */ |
322 | spin_lock(&kmmio_lock); | ||
323 | if (ctx->fpage->count) | ||
324 | arm_kmmio_fault_page(ctx->fpage); | ||
325 | spin_unlock(&kmmio_lock); | ||
287 | 326 | ||
288 | regs->flags &= ~X86_EFLAGS_TF; | 327 | regs->flags &= ~X86_EFLAGS_TF; |
289 | regs->flags |= ctx->saved_flags; | 328 | regs->flags |= ctx->saved_flags; |
@@ -315,20 +354,24 @@ static int add_kmmio_fault_page(unsigned long page) | |||
315 | f = get_kmmio_fault_page(page); | 354 | f = get_kmmio_fault_page(page); |
316 | if (f) { | 355 | if (f) { |
317 | if (!f->count) | 356 | if (!f->count) |
318 | arm_kmmio_fault_page(f->page, NULL); | 357 | arm_kmmio_fault_page(f); |
319 | f->count++; | 358 | f->count++; |
320 | return 0; | 359 | return 0; |
321 | } | 360 | } |
322 | 361 | ||
323 | f = kmalloc(sizeof(*f), GFP_ATOMIC); | 362 | f = kzalloc(sizeof(*f), GFP_ATOMIC); |
324 | if (!f) | 363 | if (!f) |
325 | return -1; | 364 | return -1; |
326 | 365 | ||
327 | f->count = 1; | 366 | f->count = 1; |
328 | f->page = page; | 367 | f->page = page; |
329 | list_add_rcu(&f->list, kmmio_page_list(f->page)); | ||
330 | 368 | ||
331 | arm_kmmio_fault_page(f->page, NULL); | 369 | if (arm_kmmio_fault_page(f)) { |
370 | kfree(f); | ||
371 | return -1; | ||
372 | } | ||
373 | |||
374 | list_add_rcu(&f->list, kmmio_page_list(f->page)); | ||
332 | 375 | ||
333 | return 0; | 376 | return 0; |
334 | } | 377 | } |
@@ -347,7 +390,7 @@ static void release_kmmio_fault_page(unsigned long page, | |||
347 | f->count--; | 390 | f->count--; |
348 | BUG_ON(f->count < 0); | 391 | BUG_ON(f->count < 0); |
349 | if (!f->count) { | 392 | if (!f->count) { |
350 | disarm_kmmio_fault_page(f->page, NULL); | 393 | disarm_kmmio_fault_page(f); |
351 | f->release_next = *release_list; | 394 | f->release_next = *release_list; |
352 | *release_list = f; | 395 | *release_list = f; |
353 | } | 396 | } |
@@ -408,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) | |||
408 | 451 | ||
409 | static void remove_kmmio_fault_pages(struct rcu_head *head) | 452 | static void remove_kmmio_fault_pages(struct rcu_head *head) |
410 | { | 453 | { |
411 | struct kmmio_delayed_release *dr = container_of( | 454 | struct kmmio_delayed_release *dr = |
412 | head, | 455 | container_of(head, struct kmmio_delayed_release, rcu); |
413 | struct kmmio_delayed_release, | ||
414 | rcu); | ||
415 | struct kmmio_fault_page *p = dr->release_list; | 456 | struct kmmio_fault_page *p = dr->release_list; |
416 | struct kmmio_fault_page **prevp = &dr->release_list; | 457 | struct kmmio_fault_page **prevp = &dr->release_list; |
417 | unsigned long flags; | 458 | unsigned long flags; |
459 | |||
418 | spin_lock_irqsave(&kmmio_lock, flags); | 460 | spin_lock_irqsave(&kmmio_lock, flags); |
419 | while (p) { | 461 | while (p) { |
420 | if (!p->count) | 462 | if (!p->count) { |
421 | list_del_rcu(&p->list); | 463 | list_del_rcu(&p->list); |
422 | else | 464 | prevp = &p->release_next; |
465 | } else { | ||
423 | *prevp = p->release_next; | 466 | *prevp = p->release_next; |
424 | prevp = &p->release_next; | 467 | } |
425 | p = p->release_next; | 468 | p = p->release_next; |
426 | } | 469 | } |
427 | spin_unlock_irqrestore(&kmmio_lock, flags); | 470 | spin_unlock_irqrestore(&kmmio_lock, flags); |
471 | |||
428 | /* This is the real RCU destroy call. */ | 472 | /* This is the real RCU destroy call. */ |
429 | call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); | 473 | call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); |
430 | } | 474 | } |
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 0bcd7883d036..605c8be06217 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c | |||
@@ -100,6 +100,9 @@ static int __init parse_memtest(char *arg) | |||
100 | { | 100 | { |
101 | if (arg) | 101 | if (arg) |
102 | memtest_pattern = simple_strtoul(arg, NULL, 0); | 102 | memtest_pattern = simple_strtoul(arg, NULL, 0); |
103 | else | ||
104 | memtest_pattern = ARRAY_SIZE(patterns); | ||
105 | |||
103 | return 0; | 106 | return 0; |
104 | } | 107 | } |
105 | 108 | ||
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 451fe95a0352..3daefa04ace5 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -416,10 +416,11 @@ void __init initmem_init(unsigned long start_pfn, | |||
416 | for_each_online_node(nid) | 416 | for_each_online_node(nid) |
417 | propagate_e820_map_node(nid); | 417 | propagate_e820_map_node(nid); |
418 | 418 | ||
419 | for_each_online_node(nid) | 419 | for_each_online_node(nid) { |
420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | 420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); |
421 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; | ||
422 | } | ||
421 | 423 | ||
422 | NODE_DATA(0)->bdata = &bootmem_node_data[0]; | ||
423 | setup_bootmem_allocator(); | 424 | setup_bootmem_allocator(); |
424 | } | 425 | } |
425 | 426 | ||
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index ab50a8d7402c..427fd1b56df5 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Written by Pekka Paalanen, 2008 <pq@iki.fi> | 2 | * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> |
3 | */ | 3 | */ |
4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
5 | #include <linux/io.h> | 5 | #include <linux/io.h> |
@@ -9,35 +9,74 @@ | |||
9 | 9 | ||
10 | static unsigned long mmio_address; | 10 | static unsigned long mmio_address; |
11 | module_param(mmio_address, ulong, 0); | 11 | module_param(mmio_address, ulong, 0); |
12 | MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); | 12 | MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " |
13 | "(or 8 MB if read_far is non-zero)."); | ||
14 | |||
15 | static unsigned long read_far = 0x400100; | ||
16 | module_param(read_far, ulong, 0); | ||
17 | MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB " | ||
18 | "(default: 0x400100)."); | ||
19 | |||
20 | static unsigned v16(unsigned i) | ||
21 | { | ||
22 | return i * 12 + 7; | ||
23 | } | ||
24 | |||
25 | static unsigned v32(unsigned i) | ||
26 | { | ||
27 | return i * 212371 + 13; | ||
28 | } | ||
13 | 29 | ||
14 | static void do_write_test(void __iomem *p) | 30 | static void do_write_test(void __iomem *p) |
15 | { | 31 | { |
16 | unsigned int i; | 32 | unsigned int i; |
33 | pr_info(MODULE_NAME ": write test.\n"); | ||
17 | mmiotrace_printk("Write test.\n"); | 34 | mmiotrace_printk("Write test.\n"); |
35 | |||
18 | for (i = 0; i < 256; i++) | 36 | for (i = 0; i < 256; i++) |
19 | iowrite8(i, p + i); | 37 | iowrite8(i, p + i); |
38 | |||
20 | for (i = 1024; i < (5 * 1024); i += 2) | 39 | for (i = 1024; i < (5 * 1024); i += 2) |
21 | iowrite16(i * 12 + 7, p + i); | 40 | iowrite16(v16(i), p + i); |
41 | |||
22 | for (i = (5 * 1024); i < (16 * 1024); i += 4) | 42 | for (i = (5 * 1024); i < (16 * 1024); i += 4) |
23 | iowrite32(i * 212371 + 13, p + i); | 43 | iowrite32(v32(i), p + i); |
24 | } | 44 | } |
25 | 45 | ||
26 | static void do_read_test(void __iomem *p) | 46 | static void do_read_test(void __iomem *p) |
27 | { | 47 | { |
28 | unsigned int i; | 48 | unsigned int i; |
49 | unsigned errs[3] = { 0 }; | ||
50 | pr_info(MODULE_NAME ": read test.\n"); | ||
29 | mmiotrace_printk("Read test.\n"); | 51 | mmiotrace_printk("Read test.\n"); |
52 | |||
30 | for (i = 0; i < 256; i++) | 53 | for (i = 0; i < 256; i++) |
31 | ioread8(p + i); | 54 | if (ioread8(p + i) != i) |
55 | ++errs[0]; | ||
56 | |||
32 | for (i = 1024; i < (5 * 1024); i += 2) | 57 | for (i = 1024; i < (5 * 1024); i += 2) |
33 | ioread16(p + i); | 58 | if (ioread16(p + i) != v16(i)) |
59 | ++errs[1]; | ||
60 | |||
34 | for (i = (5 * 1024); i < (16 * 1024); i += 4) | 61 | for (i = (5 * 1024); i < (16 * 1024); i += 4) |
35 | ioread32(p + i); | 62 | if (ioread32(p + i) != v32(i)) |
63 | ++errs[2]; | ||
64 | |||
65 | mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n", | ||
66 | errs[0], errs[1], errs[2]); | ||
36 | } | 67 | } |
37 | 68 | ||
38 | static void do_test(void) | 69 | static void do_read_far_test(void __iomem *p) |
39 | { | 70 | { |
40 | void __iomem *p = ioremap_nocache(mmio_address, 0x4000); | 71 | pr_info(MODULE_NAME ": read far test.\n"); |
72 | mmiotrace_printk("Read far test.\n"); | ||
73 | |||
74 | ioread32(p + read_far); | ||
75 | } | ||
76 | |||
77 | static void do_test(unsigned long size) | ||
78 | { | ||
79 | void __iomem *p = ioremap_nocache(mmio_address, size); | ||
41 | if (!p) { | 80 | if (!p) { |
42 | pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); | 81 | pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); |
43 | return; | 82 | return; |
@@ -45,11 +84,15 @@ static void do_test(void) | |||
45 | mmiotrace_printk("ioremap returned %p.\n", p); | 84 | mmiotrace_printk("ioremap returned %p.\n", p); |
46 | do_write_test(p); | 85 | do_write_test(p); |
47 | do_read_test(p); | 86 | do_read_test(p); |
87 | if (read_far && read_far < size - 4) | ||
88 | do_read_far_test(p); | ||
48 | iounmap(p); | 89 | iounmap(p); |
49 | } | 90 | } |
50 | 91 | ||
51 | static int __init init(void) | 92 | static int __init init(void) |
52 | { | 93 | { |
94 | unsigned long size = (read_far) ? (8 << 20) : (16 << 10); | ||
95 | |||
53 | if (mmio_address == 0) { | 96 | if (mmio_address == 0) { |
54 | pr_err(MODULE_NAME ": you have to use the module argument " | 97 | pr_err(MODULE_NAME ": you have to use the module argument " |
55 | "mmio_address.\n"); | 98 | "mmio_address.\n"); |
@@ -58,10 +101,11 @@ static int __init init(void) | |||
58 | return -ENXIO; | 101 | return -ENXIO; |
59 | } | 102 | } |
60 | 103 | ||
61 | pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " | 104 | pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " |
62 | "in PCI address space, and writing " | 105 | "address space, and writing 16 kB of rubbish in there.\n", |
63 | "rubbish in there.\n", mmio_address); | 106 | size >> 10, mmio_address); |
64 | do_test(); | 107 | do_test(size); |
108 | pr_info(MODULE_NAME ": All done.\n"); | ||
65 | return 0; | 109 | return 0; |
66 | } | 110 | } |
67 | 111 | ||