aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/highmem_32.c9
-rw-r--r--arch/x86/mm/init.c344
-rw-r--r--arch/x86/mm/init_32.c256
-rw-r--r--arch/x86/mm/init_64.c280
-rw-r--r--arch/x86/mm/ioremap.c35
-rw-r--r--arch/x86/mm/kmmio.c164
-rw-r--r--arch/x86/mm/memtest.c3
-rw-r--r--arch/x86/mm/numa_32.c5
-rw-r--r--arch/x86/mm/testmmiotrace.c70
9 files changed, 602 insertions, 564 deletions
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 00f127c80b0e..d11745334a67 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -158,7 +158,6 @@ EXPORT_SYMBOL(kunmap);
158EXPORT_SYMBOL(kmap_atomic); 158EXPORT_SYMBOL(kmap_atomic);
159EXPORT_SYMBOL(kunmap_atomic); 159EXPORT_SYMBOL(kunmap_atomic);
160 160
161#ifdef CONFIG_NUMA
162void __init set_highmem_pages_init(void) 161void __init set_highmem_pages_init(void)
163{ 162{
164 struct zone *zone; 163 struct zone *zone;
@@ -182,11 +181,3 @@ void __init set_highmem_pages_init(void)
182 } 181 }
183 totalram_pages += totalhigh_pages; 182 totalram_pages += totalhigh_pages;
184} 183}
185#else
186void __init set_highmem_pages_init(void)
187{
188 add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
189
190 totalram_pages += totalhigh_pages;
191}
192#endif /* CONFIG_NUMA */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ce6a722587d8..15219e0d1243 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1,8 +1,345 @@
1#include <linux/ioport.h>
1#include <linux/swap.h> 2#include <linux/swap.h>
3
2#include <asm/cacheflush.h> 4#include <asm/cacheflush.h>
5#include <asm/e820.h>
6#include <asm/init.h>
3#include <asm/page.h> 7#include <asm/page.h>
8#include <asm/page_types.h>
4#include <asm/sections.h> 9#include <asm/sections.h>
5#include <asm/system.h> 10#include <asm/system.h>
11#include <asm/tlbflush.h>
12
13unsigned long __initdata e820_table_start;
14unsigned long __meminitdata e820_table_end;
15unsigned long __meminitdata e820_table_top;
16
17int after_bootmem;
18
19int direct_gbpages
20#ifdef CONFIG_DIRECT_GBPAGES
21 = 1
22#endif
23;
24
25static void __init find_early_table_space(unsigned long end, int use_pse,
26 int use_gbpages)
27{
28 unsigned long puds, pmds, ptes, tables, start;
29
30 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
31 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
32
33 if (use_gbpages) {
34 unsigned long extra;
35
36 extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
37 pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
38 } else
39 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
40
41 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
42
43 if (use_pse) {
44 unsigned long extra;
45
46 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
47#ifdef CONFIG_X86_32
48 extra += PMD_SIZE;
49#endif
50 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
51 } else
52 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
53
54 tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
55
56#ifdef CONFIG_X86_32
57 /* for fixmap */
58 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
59#endif
60
61 /*
62 * RED-PEN putting page tables only on node 0 could
63 * cause a hotspot and fill up ZONE_DMA. The page tables
64 * need roughly 0.5KB per GB.
65 */
66#ifdef CONFIG_X86_32
67 start = 0x7000;
68 e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
69 tables, PAGE_SIZE);
70#else /* CONFIG_X86_64 */
71 start = 0x8000;
72 e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE);
73#endif
74 if (e820_table_start == -1UL)
75 panic("Cannot find space for the kernel page tables");
76
77 e820_table_start >>= PAGE_SHIFT;
78 e820_table_end = e820_table_start;
79 e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
80
81 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
82 end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT);
83}
84
85struct map_range {
86 unsigned long start;
87 unsigned long end;
88 unsigned page_size_mask;
89};
90
91#ifdef CONFIG_X86_32
92#define NR_RANGE_MR 3
93#else /* CONFIG_X86_64 */
94#define NR_RANGE_MR 5
95#endif
96
97static int save_mr(struct map_range *mr, int nr_range,
98 unsigned long start_pfn, unsigned long end_pfn,
99 unsigned long page_size_mask)
100{
101 if (start_pfn < end_pfn) {
102 if (nr_range >= NR_RANGE_MR)
103 panic("run out of range for init_memory_mapping\n");
104 mr[nr_range].start = start_pfn<<PAGE_SHIFT;
105 mr[nr_range].end = end_pfn<<PAGE_SHIFT;
106 mr[nr_range].page_size_mask = page_size_mask;
107 nr_range++;
108 }
109
110 return nr_range;
111}
112
113#ifdef CONFIG_X86_64
114static void __init init_gbpages(void)
115{
116 if (direct_gbpages && cpu_has_gbpages)
117 printk(KERN_INFO "Using GB pages for direct mapping\n");
118 else
119 direct_gbpages = 0;
120}
121#else
122static inline void init_gbpages(void)
123{
124}
125#endif
126
127/*
128 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
129 * This runs before bootmem is initialized and gets pages directly from
130 * the physical memory. To access them they are temporarily mapped.
131 */
132unsigned long __init_refok init_memory_mapping(unsigned long start,
133 unsigned long end)
134{
135 unsigned long page_size_mask = 0;
136 unsigned long start_pfn, end_pfn;
137 unsigned long ret = 0;
138 unsigned long pos;
139
140 struct map_range mr[NR_RANGE_MR];
141 int nr_range, i;
142 int use_pse, use_gbpages;
143
144 printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
145
146 if (!after_bootmem)
147 init_gbpages();
148
149#ifdef CONFIG_DEBUG_PAGEALLOC
150 /*
151 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
152 * This will simplify cpa(), which otherwise needs to support splitting
153 * large pages into small in interrupt context, etc.
154 */
155 use_pse = use_gbpages = 0;
156#else
157 use_pse = cpu_has_pse;
158 use_gbpages = direct_gbpages;
159#endif
160
161#ifdef CONFIG_X86_32
162#ifdef CONFIG_X86_PAE
163 set_nx();
164 if (nx_enabled)
165 printk(KERN_INFO "NX (Execute Disable) protection: active\n");
166#endif
167
168 /* Enable PSE if available */
169 if (cpu_has_pse)
170 set_in_cr4(X86_CR4_PSE);
171
172 /* Enable PGE if available */
173 if (cpu_has_pge) {
174 set_in_cr4(X86_CR4_PGE);
175 __supported_pte_mask |= _PAGE_GLOBAL;
176 }
177#endif
178
179 if (use_gbpages)
180 page_size_mask |= 1 << PG_LEVEL_1G;
181 if (use_pse)
182 page_size_mask |= 1 << PG_LEVEL_2M;
183
184 memset(mr, 0, sizeof(mr));
185 nr_range = 0;
186
187 /* head if not big page alignment ? */
188 start_pfn = start >> PAGE_SHIFT;
189 pos = start_pfn << PAGE_SHIFT;
190#ifdef CONFIG_X86_32
191 /*
192 * Don't use a large page for the first 2/4MB of memory
193 * because there are often fixed size MTRRs in there
194 * and overlapping MTRRs into large pages can cause
195 * slowdowns.
196 */
197 if (pos == 0)
198 end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT);
199 else
200 end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
201 << (PMD_SHIFT - PAGE_SHIFT);
202#else /* CONFIG_X86_64 */
203 end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
204 << (PMD_SHIFT - PAGE_SHIFT);
205#endif
206 if (end_pfn > (end >> PAGE_SHIFT))
207 end_pfn = end >> PAGE_SHIFT;
208 if (start_pfn < end_pfn) {
209 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
210 pos = end_pfn << PAGE_SHIFT;
211 }
212
213 /* big page (2M) range */
214 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
215 << (PMD_SHIFT - PAGE_SHIFT);
216#ifdef CONFIG_X86_32
217 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
218#else /* CONFIG_X86_64 */
219 end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
220 << (PUD_SHIFT - PAGE_SHIFT);
221 if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
222 end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
223#endif
224
225 if (start_pfn < end_pfn) {
226 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
227 page_size_mask & (1<<PG_LEVEL_2M));
228 pos = end_pfn << PAGE_SHIFT;
229 }
230
231#ifdef CONFIG_X86_64
232 /* big page (1G) range */
233 start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
234 << (PUD_SHIFT - PAGE_SHIFT);
235 end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
236 if (start_pfn < end_pfn) {
237 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
238 page_size_mask &
239 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
240 pos = end_pfn << PAGE_SHIFT;
241 }
242
243 /* tail is not big page (1G) alignment */
244 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
245 << (PMD_SHIFT - PAGE_SHIFT);
246 end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
247 if (start_pfn < end_pfn) {
248 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
249 page_size_mask & (1<<PG_LEVEL_2M));
250 pos = end_pfn << PAGE_SHIFT;
251 }
252#endif
253
254 /* tail is not big page (2M) alignment */
255 start_pfn = pos>>PAGE_SHIFT;
256 end_pfn = end>>PAGE_SHIFT;
257 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
258
259 /* try to merge same page size and continuous */
260 for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
261 unsigned long old_start;
262 if (mr[i].end != mr[i+1].start ||
263 mr[i].page_size_mask != mr[i+1].page_size_mask)
264 continue;
265 /* move it */
266 old_start = mr[i].start;
267 memmove(&mr[i], &mr[i+1],
268 (nr_range - 1 - i) * sizeof(struct map_range));
269 mr[i--].start = old_start;
270 nr_range--;
271 }
272
273 for (i = 0; i < nr_range; i++)
274 printk(KERN_DEBUG " %010lx - %010lx page %s\n",
275 mr[i].start, mr[i].end,
276 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
277 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
278
279 /*
280 * Find space for the kernel direct mapping tables.
281 *
282 * Later we should allocate these tables in the local node of the
283 * memory mapped. Unfortunately this is done currently before the
284 * nodes are discovered.
285 */
286 if (!after_bootmem)
287 find_early_table_space(end, use_pse, use_gbpages);
288
289#ifdef CONFIG_X86_32
290 for (i = 0; i < nr_range; i++)
291 kernel_physical_mapping_init(mr[i].start, mr[i].end,
292 mr[i].page_size_mask);
293 ret = end;
294#else /* CONFIG_X86_64 */
295 for (i = 0; i < nr_range; i++)
296 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
297 mr[i].page_size_mask);
298#endif
299
300#ifdef CONFIG_X86_32
301 early_ioremap_page_table_range_init();
302
303 load_cr3(swapper_pg_dir);
304#endif
305
306#ifdef CONFIG_X86_64
307 if (!after_bootmem)
308 mmu_cr4_features = read_cr4();
309#endif
310 __flush_tlb_all();
311
312 if (!after_bootmem && e820_table_end > e820_table_start)
313 reserve_early(e820_table_start << PAGE_SHIFT,
314 e820_table_end << PAGE_SHIFT, "PGTABLE");
315
316 if (!after_bootmem)
317 early_memtest(start, end);
318
319 return ret >> PAGE_SHIFT;
320}
321
322
323/*
324 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
325 * is valid. The argument is a physical page number.
326 *
327 *
328 * On x86, access has to be given to the first megabyte of ram because that area
329 * contains bios code and data regions used by X and dosemu and similar apps.
330 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
331 * mmio resources as well as potential bios/acpi data regions.
332 */
333int devmem_is_allowed(unsigned long pagenr)
334{
335 if (pagenr <= 256)
336 return 1;
337 if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
338 return 0;
339 if (!page_is_ram(pagenr))
340 return 1;
341 return 0;
342}
6 343
7void free_init_pages(char *what, unsigned long begin, unsigned long end) 344void free_init_pages(char *what, unsigned long begin, unsigned long end)
8{ 345{
@@ -47,3 +384,10 @@ void free_initmem(void)
47 (unsigned long)(&__init_begin), 384 (unsigned long)(&__init_begin),
48 (unsigned long)(&__init_end)); 385 (unsigned long)(&__init_end));
49} 386}
387
388#ifdef CONFIG_BLK_DEV_INITRD
389void free_initrd_mem(unsigned long start, unsigned long end)
390{
391 free_init_pages("initrd memory", start, end);
392}
393#endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 47df0e1bbeb9..db81e9a8556b 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,6 +49,7 @@
49#include <asm/paravirt.h> 49#include <asm/paravirt.h>
50#include <asm/setup.h> 50#include <asm/setup.h>
51#include <asm/cacheflush.h> 51#include <asm/cacheflush.h>
52#include <asm/init.h>
52 53
53unsigned long max_low_pfn_mapped; 54unsigned long max_low_pfn_mapped;
54unsigned long max_pfn_mapped; 55unsigned long max_pfn_mapped;
@@ -58,19 +59,14 @@ unsigned long highstart_pfn, highend_pfn;
58 59
59static noinline int do_test_wp_bit(void); 60static noinline int do_test_wp_bit(void);
60 61
61 62bool __read_mostly __vmalloc_start_set = false;
62static unsigned long __initdata table_start;
63static unsigned long __meminitdata table_end;
64static unsigned long __meminitdata table_top;
65
66static int __initdata after_init_bootmem;
67 63
68static __init void *alloc_low_page(void) 64static __init void *alloc_low_page(void)
69{ 65{
70 unsigned long pfn = table_end++; 66 unsigned long pfn = e820_table_end++;
71 void *adr; 67 void *adr;
72 68
73 if (pfn >= table_top) 69 if (pfn >= e820_table_top)
74 panic("alloc_low_page: ran out of memory"); 70 panic("alloc_low_page: ran out of memory");
75 71
76 adr = __va(pfn * PAGE_SIZE); 72 adr = __va(pfn * PAGE_SIZE);
@@ -90,7 +86,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
90 86
91#ifdef CONFIG_X86_PAE 87#ifdef CONFIG_X86_PAE
92 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 88 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
93 if (after_init_bootmem) 89 if (after_bootmem)
94 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); 90 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
95 else 91 else
96 pmd_table = (pmd_t *)alloc_low_page(); 92 pmd_table = (pmd_t *)alloc_low_page();
@@ -117,7 +113,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
117 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { 113 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
118 pte_t *page_table = NULL; 114 pte_t *page_table = NULL;
119 115
120 if (after_init_bootmem) { 116 if (after_bootmem) {
121#ifdef CONFIG_DEBUG_PAGEALLOC 117#ifdef CONFIG_DEBUG_PAGEALLOC
122 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); 118 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
123#endif 119#endif
@@ -168,12 +164,12 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
168 if (pmd_idx_kmap_begin != pmd_idx_kmap_end 164 if (pmd_idx_kmap_begin != pmd_idx_kmap_end
169 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin 165 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
170 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end 166 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
171 && ((__pa(pte) >> PAGE_SHIFT) < table_start 167 && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start
172 || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { 168 || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) {
173 pte_t *newpte; 169 pte_t *newpte;
174 int i; 170 int i;
175 171
176 BUG_ON(after_init_bootmem); 172 BUG_ON(after_bootmem);
177 newpte = alloc_low_page(); 173 newpte = alloc_low_page();
178 for (i = 0; i < PTRS_PER_PTE; i++) 174 for (i = 0; i < PTRS_PER_PTE; i++)
179 set_pte(newpte + i, pte[i]); 175 set_pte(newpte + i, pte[i]);
@@ -242,11 +238,14 @@ static inline int is_kernel_text(unsigned long addr)
242 * of max_low_pfn pages, by creating page tables starting from address 238 * of max_low_pfn pages, by creating page tables starting from address
243 * PAGE_OFFSET: 239 * PAGE_OFFSET:
244 */ 240 */
245static void __init kernel_physical_mapping_init(pgd_t *pgd_base, 241unsigned long __init
246 unsigned long start_pfn, 242kernel_physical_mapping_init(unsigned long start,
247 unsigned long end_pfn, 243 unsigned long end,
248 int use_pse) 244 unsigned long page_size_mask)
249{ 245{
246 int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
247 unsigned long start_pfn, end_pfn;
248 pgd_t *pgd_base = swapper_pg_dir;
250 int pgd_idx, pmd_idx, pte_ofs; 249 int pgd_idx, pmd_idx, pte_ofs;
251 unsigned long pfn; 250 unsigned long pfn;
252 pgd_t *pgd; 251 pgd_t *pgd;
@@ -255,6 +254,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
255 unsigned pages_2m, pages_4k; 254 unsigned pages_2m, pages_4k;
256 int mapping_iter; 255 int mapping_iter;
257 256
257 start_pfn = start >> PAGE_SHIFT;
258 end_pfn = end >> PAGE_SHIFT;
259
258 /* 260 /*
259 * First iteration will setup identity mapping using large/small pages 261 * First iteration will setup identity mapping using large/small pages
260 * based on use_pse, with other attributes same as set by 262 * based on use_pse, with other attributes same as set by
@@ -369,26 +371,6 @@ repeat:
369 mapping_iter = 2; 371 mapping_iter = 2;
370 goto repeat; 372 goto repeat;
371 } 373 }
372}
373
374/*
375 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
376 * is valid. The argument is a physical page number.
377 *
378 *
379 * On x86, access has to be given to the first megabyte of ram because that area
380 * contains bios code and data regions used by X and dosemu and similar apps.
381 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
382 * mmio resources as well as potential bios/acpi data regions.
383 */
384int devmem_is_allowed(unsigned long pagenr)
385{
386 if (pagenr <= 256)
387 return 1;
388 if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
389 return 0;
390 if (!page_is_ram(pagenr))
391 return 1;
392 return 0; 374 return 0;
393} 375}
394 376
@@ -545,8 +527,9 @@ void __init native_pagetable_setup_done(pgd_t *base)
545 * be partially populated, and so it avoids stomping on any existing 527 * be partially populated, and so it avoids stomping on any existing
546 * mappings. 528 * mappings.
547 */ 529 */
548static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) 530void __init early_ioremap_page_table_range_init(void)
549{ 531{
532 pgd_t *pgd_base = swapper_pg_dir;
550 unsigned long vaddr, end; 533 unsigned long vaddr, end;
551 534
552 /* 535 /*
@@ -641,7 +624,7 @@ static int __init noexec_setup(char *str)
641} 624}
642early_param("noexec", noexec_setup); 625early_param("noexec", noexec_setup);
643 626
644static void __init set_nx(void) 627void __init set_nx(void)
645{ 628{
646 unsigned int v[4], l, h; 629 unsigned int v[4], l, h;
647 630
@@ -793,6 +776,8 @@ void __init initmem_init(unsigned long start_pfn,
793#ifdef CONFIG_FLATMEM 776#ifdef CONFIG_FLATMEM
794 max_mapnr = num_physpages; 777 max_mapnr = num_physpages;
795#endif 778#endif
779 __vmalloc_start_set = true;
780
796 printk(KERN_NOTICE "%ldMB LOWMEM available.\n", 781 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
797 pages_to_mb(max_low_pfn)); 782 pages_to_mb(max_low_pfn));
798 783
@@ -814,176 +799,66 @@ static void __init zone_sizes_init(void)
814 free_area_init_nodes(max_zone_pfns); 799 free_area_init_nodes(max_zone_pfns);
815} 800}
816 801
802static unsigned long __init setup_node_bootmem(int nodeid,
803 unsigned long start_pfn,
804 unsigned long end_pfn,
805 unsigned long bootmap)
806{
807 unsigned long bootmap_size;
808
809 /* don't touch min_low_pfn */
810 bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
811 bootmap >> PAGE_SHIFT,
812 start_pfn, end_pfn);
813 printk(KERN_INFO " node %d low ram: %08lx - %08lx\n",
814 nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
815 printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
816 nodeid, bootmap, bootmap + bootmap_size);
817 free_bootmem_with_active_regions(nodeid, end_pfn);
818 early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
819
820 return bootmap + bootmap_size;
821}
822
817void __init setup_bootmem_allocator(void) 823void __init setup_bootmem_allocator(void)
818{ 824{
819 int i; 825 int nodeid;
820 unsigned long bootmap_size, bootmap; 826 unsigned long bootmap_size, bootmap;
821 /* 827 /*
822 * Initialize the boot-time allocator (with low memory only): 828 * Initialize the boot-time allocator (with low memory only):
823 */ 829 */
824 bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; 830 bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
825 bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, 831 bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
826 max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
827 PAGE_SIZE); 832 PAGE_SIZE);
828 if (bootmap == -1L) 833 if (bootmap == -1L)
829 panic("Cannot find bootmem map of size %ld\n", bootmap_size); 834 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
830 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); 835 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
831 836
832 /* don't touch min_low_pfn */
833 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
834 min_low_pfn, max_low_pfn);
835 printk(KERN_INFO " mapped low ram: 0 - %08lx\n", 837 printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
836 max_pfn_mapped<<PAGE_SHIFT); 838 max_pfn_mapped<<PAGE_SHIFT);
837 printk(KERN_INFO " low ram: %08lx - %08lx\n", 839 printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
838 min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
839 printk(KERN_INFO " bootmap %08lx - %08lx\n",
840 bootmap, bootmap + bootmap_size);
841 for_each_online_node(i)
842 free_bootmem_with_active_regions(i, max_low_pfn);
843 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
844
845 after_init_bootmem = 1;
846}
847
848static void __init find_early_table_space(unsigned long end, int use_pse)
849{
850 unsigned long puds, pmds, ptes, tables, start;
851 840
852 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 841 for_each_online_node(nodeid) {
853 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); 842 unsigned long start_pfn, end_pfn;
854 843
855 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 844#ifdef CONFIG_NEED_MULTIPLE_NODES
856 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); 845 start_pfn = node_start_pfn[nodeid];
857 846 end_pfn = node_end_pfn[nodeid];
858 if (use_pse) { 847 if (start_pfn > max_low_pfn)
859 unsigned long extra; 848 continue;
860 849 if (end_pfn > max_low_pfn)
861 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); 850 end_pfn = max_low_pfn;
862 extra += PMD_SIZE;
863 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
864 } else
865 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
866
867 tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
868
869 /* for fixmap */
870 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
871
872 /*
873 * RED-PEN putting page tables only on node 0 could
874 * cause a hotspot and fill up ZONE_DMA. The page tables
875 * need roughly 0.5KB per GB.
876 */
877 start = 0x7000;
878 table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
879 tables, PAGE_SIZE);
880 if (table_start == -1UL)
881 panic("Cannot find space for the kernel page tables");
882
883 table_start >>= PAGE_SHIFT;
884 table_end = table_start;
885 table_top = table_start + (tables>>PAGE_SHIFT);
886
887 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
888 end, table_start << PAGE_SHIFT,
889 (table_start << PAGE_SHIFT) + tables);
890}
891
892unsigned long __init_refok init_memory_mapping(unsigned long start,
893 unsigned long end)
894{
895 pgd_t *pgd_base = swapper_pg_dir;
896 unsigned long start_pfn, end_pfn;
897 unsigned long big_page_start;
898#ifdef CONFIG_DEBUG_PAGEALLOC
899 /*
900 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
901 * This will simplify cpa(), which otherwise needs to support splitting
902 * large pages into small in interrupt context, etc.
903 */
904 int use_pse = 0;
905#else 851#else
906 int use_pse = cpu_has_pse; 852 start_pfn = 0;
853 end_pfn = max_low_pfn;
907#endif 854#endif
908 855 bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
909 /* 856 bootmap);
910 * Find space for the kernel direct mapping tables.
911 */
912 if (!after_init_bootmem)
913 find_early_table_space(end, use_pse);
914
915#ifdef CONFIG_X86_PAE
916 set_nx();
917 if (nx_enabled)
918 printk(KERN_INFO "NX (Execute Disable) protection: active\n");
919#endif
920
921 /* Enable PSE if available */
922 if (cpu_has_pse)
923 set_in_cr4(X86_CR4_PSE);
924
925 /* Enable PGE if available */
926 if (cpu_has_pge) {
927 set_in_cr4(X86_CR4_PGE);
928 __supported_pte_mask |= _PAGE_GLOBAL;
929 }
930
931 /*
932 * Don't use a large page for the first 2/4MB of memory
933 * because there are often fixed size MTRRs in there
934 * and overlapping MTRRs into large pages can cause
935 * slowdowns.
936 */
937 big_page_start = PMD_SIZE;
938
939 if (start < big_page_start) {
940 start_pfn = start >> PAGE_SHIFT;
941 end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT);
942 } else {
943 /* head is not big page alignment ? */
944 start_pfn = start >> PAGE_SHIFT;
945 end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
946 << (PMD_SHIFT - PAGE_SHIFT);
947 } 857 }
948 if (start_pfn < end_pfn)
949 kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0);
950
951 /* big page range */
952 start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
953 << (PMD_SHIFT - PAGE_SHIFT);
954 if (start_pfn < (big_page_start >> PAGE_SHIFT))
955 start_pfn = big_page_start >> PAGE_SHIFT;
956 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
957 if (start_pfn < end_pfn)
958 kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
959 use_pse);
960
961 /* tail is not big page alignment ? */
962 start_pfn = end_pfn;
963 if (start_pfn > (big_page_start>>PAGE_SHIFT)) {
964 end_pfn = end >> PAGE_SHIFT;
965 if (start_pfn < end_pfn)
966 kernel_physical_mapping_init(pgd_base, start_pfn,
967 end_pfn, 0);
968 }
969
970 early_ioremap_page_table_range_init(pgd_base);
971 858
972 load_cr3(swapper_pg_dir); 859 after_bootmem = 1;
973
974 __flush_tlb_all();
975
976 if (!after_init_bootmem)
977 reserve_early(table_start << PAGE_SHIFT,
978 table_end << PAGE_SHIFT, "PGTABLE");
979
980 if (!after_init_bootmem)
981 early_memtest(start, end);
982
983 return end >> PAGE_SHIFT;
984} 860}
985 861
986
987/* 862/*
988 * paging_init() sets up the page tables - note that the first 8MB are 863 * paging_init() sets up the page tables - note that the first 8MB are
989 * already mapped by head.S. 864 * already mapped by head.S.
@@ -1217,13 +1092,6 @@ void mark_rodata_ro(void)
1217} 1092}
1218#endif 1093#endif
1219 1094
1220#ifdef CONFIG_BLK_DEV_INITRD
1221void free_initrd_mem(unsigned long start, unsigned long end)
1222{
1223 free_init_pages("initrd memory", start, end);
1224}
1225#endif
1226
1227int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, 1095int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
1228 int flags) 1096 int flags)
1229{ 1097{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 07f44d491df1..54efa57d1c03 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -48,6 +48,7 @@
48#include <asm/kdebug.h> 48#include <asm/kdebug.h>
49#include <asm/numa.h> 49#include <asm/numa.h>
50#include <asm/cacheflush.h> 50#include <asm/cacheflush.h>
51#include <asm/init.h>
51 52
52/* 53/*
53 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. 54 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -61,12 +62,6 @@ static unsigned long dma_reserve __initdata;
61 62
62DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 63DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
63 64
64int direct_gbpages
65#ifdef CONFIG_DIRECT_GBPAGES
66 = 1
67#endif
68;
69
70static int __init parse_direct_gbpages_off(char *arg) 65static int __init parse_direct_gbpages_off(char *arg)
71{ 66{
72 direct_gbpages = 0; 67 direct_gbpages = 0;
@@ -87,12 +82,10 @@ early_param("gbpages", parse_direct_gbpages_on);
87 * around without checking the pgd every time. 82 * around without checking the pgd every time.
88 */ 83 */
89 84
90int after_bootmem;
91
92pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; 85pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
93EXPORT_SYMBOL_GPL(__supported_pte_mask); 86EXPORT_SYMBOL_GPL(__supported_pte_mask);
94 87
95static int do_not_nx __cpuinitdata; 88static int disable_nx __cpuinitdata;
96 89
97/* 90/*
98 * noexec=on|off 91 * noexec=on|off
@@ -107,9 +100,9 @@ static int __init nonx_setup(char *str)
107 return -EINVAL; 100 return -EINVAL;
108 if (!strncmp(str, "on", 2)) { 101 if (!strncmp(str, "on", 2)) {
109 __supported_pte_mask |= _PAGE_NX; 102 __supported_pte_mask |= _PAGE_NX;
110 do_not_nx = 0; 103 disable_nx = 0;
111 } else if (!strncmp(str, "off", 3)) { 104 } else if (!strncmp(str, "off", 3)) {
112 do_not_nx = 1; 105 disable_nx = 1;
113 __supported_pte_mask &= ~_PAGE_NX; 106 __supported_pte_mask &= ~_PAGE_NX;
114 } 107 }
115 return 0; 108 return 0;
@@ -121,7 +114,7 @@ void __cpuinit check_efer(void)
121 unsigned long efer; 114 unsigned long efer;
122 115
123 rdmsrl(MSR_EFER, efer); 116 rdmsrl(MSR_EFER, efer);
124 if (!(efer & EFER_NX) || do_not_nx) 117 if (!(efer & EFER_NX) || disable_nx)
125 __supported_pte_mask &= ~_PAGE_NX; 118 __supported_pte_mask &= ~_PAGE_NX;
126} 119}
127 120
@@ -325,13 +318,9 @@ void __init cleanup_highmap(void)
325 } 318 }
326} 319}
327 320
328static unsigned long __initdata table_start;
329static unsigned long __meminitdata table_end;
330static unsigned long __meminitdata table_top;
331
332static __ref void *alloc_low_page(unsigned long *phys) 321static __ref void *alloc_low_page(unsigned long *phys)
333{ 322{
334 unsigned long pfn = table_end++; 323 unsigned long pfn = e820_table_end++;
335 void *adr; 324 void *adr;
336 325
337 if (after_bootmem) { 326 if (after_bootmem) {
@@ -341,7 +330,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
341 return adr; 330 return adr;
342 } 331 }
343 332
344 if (pfn >= table_top) 333 if (pfn >= e820_table_top)
345 panic("alloc_low_page: ran out of memory"); 334 panic("alloc_low_page: ran out of memory");
346 335
347 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); 336 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
@@ -581,58 +570,10 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
581 return phys_pud_init(pud, addr, end, page_size_mask); 570 return phys_pud_init(pud, addr, end, page_size_mask);
582} 571}
583 572
584static void __init find_early_table_space(unsigned long end, int use_pse, 573unsigned long __init
585 int use_gbpages) 574kernel_physical_mapping_init(unsigned long start,
586{ 575 unsigned long end,
587 unsigned long puds, pmds, ptes, tables, start; 576 unsigned long page_size_mask)
588
589 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
590 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
591 if (use_gbpages) {
592 unsigned long extra;
593 extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
594 pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
595 } else
596 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
597 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
598
599 if (use_pse) {
600 unsigned long extra;
601 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
602 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
603 } else
604 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
605 tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
606
607 /*
608 * RED-PEN putting page tables only on node 0 could
609 * cause a hotspot and fill up ZONE_DMA. The page tables
610 * need roughly 0.5KB per GB.
611 */
612 start = 0x8000;
613 table_start = find_e820_area(start, end, tables, PAGE_SIZE);
614 if (table_start == -1UL)
615 panic("Cannot find space for the kernel page tables");
616
617 table_start >>= PAGE_SHIFT;
618 table_end = table_start;
619 table_top = table_start + (tables >> PAGE_SHIFT);
620
621 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
622 end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT);
623}
624
625static void __init init_gbpages(void)
626{
627 if (direct_gbpages && cpu_has_gbpages)
628 printk(KERN_INFO "Using GB pages for direct mapping\n");
629 else
630 direct_gbpages = 0;
631}
632
633static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
634 unsigned long end,
635 unsigned long page_size_mask)
636{ 577{
637 578
638 unsigned long next, last_map_addr = end; 579 unsigned long next, last_map_addr = end;
@@ -669,176 +610,6 @@ static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
669 return last_map_addr; 610 return last_map_addr;
670} 611}
671 612
672struct map_range {
673 unsigned long start;
674 unsigned long end;
675 unsigned page_size_mask;
676};
677
678#define NR_RANGE_MR 5
679
680static int save_mr(struct map_range *mr, int nr_range,
681 unsigned long start_pfn, unsigned long end_pfn,
682 unsigned long page_size_mask)
683{
684
685 if (start_pfn < end_pfn) {
686 if (nr_range >= NR_RANGE_MR)
687 panic("run out of range for init_memory_mapping\n");
688 mr[nr_range].start = start_pfn<<PAGE_SHIFT;
689 mr[nr_range].end = end_pfn<<PAGE_SHIFT;
690 mr[nr_range].page_size_mask = page_size_mask;
691 nr_range++;
692 }
693
694 return nr_range;
695}
696
697/*
698 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
699 * This runs before bootmem is initialized and gets pages directly from
700 * the physical memory. To access them they are temporarily mapped.
701 */
702unsigned long __init_refok init_memory_mapping(unsigned long start,
703 unsigned long end)
704{
705 unsigned long last_map_addr = 0;
706 unsigned long page_size_mask = 0;
707 unsigned long start_pfn, end_pfn;
708 unsigned long pos;
709
710 struct map_range mr[NR_RANGE_MR];
711 int nr_range, i;
712 int use_pse, use_gbpages;
713
714 printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
715
716 /*
717 * Find space for the kernel direct mapping tables.
718 *
719 * Later we should allocate these tables in the local node of the
720 * memory mapped. Unfortunately this is done currently before the
721 * nodes are discovered.
722 */
723 if (!after_bootmem)
724 init_gbpages();
725
726#ifdef CONFIG_DEBUG_PAGEALLOC
727 /*
728 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
729 * This will simplify cpa(), which otherwise needs to support splitting
730 * large pages into small in interrupt context, etc.
731 */
732 use_pse = use_gbpages = 0;
733#else
734 use_pse = cpu_has_pse;
735 use_gbpages = direct_gbpages;
736#endif
737
738 if (use_gbpages)
739 page_size_mask |= 1 << PG_LEVEL_1G;
740 if (use_pse)
741 page_size_mask |= 1 << PG_LEVEL_2M;
742
743 memset(mr, 0, sizeof(mr));
744 nr_range = 0;
745
746 /* head if not big page alignment ?*/
747 start_pfn = start >> PAGE_SHIFT;
748 pos = start_pfn << PAGE_SHIFT;
749 end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
750 << (PMD_SHIFT - PAGE_SHIFT);
751 if (end_pfn > (end >> PAGE_SHIFT))
752 end_pfn = end >> PAGE_SHIFT;
753 if (start_pfn < end_pfn) {
754 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
755 pos = end_pfn << PAGE_SHIFT;
756 }
757
758 /* big page (2M) range*/
759 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
760 << (PMD_SHIFT - PAGE_SHIFT);
761 end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
762 << (PUD_SHIFT - PAGE_SHIFT);
763 if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
764 end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
765 if (start_pfn < end_pfn) {
766 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
767 page_size_mask & (1<<PG_LEVEL_2M));
768 pos = end_pfn << PAGE_SHIFT;
769 }
770
771 /* big page (1G) range */
772 start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
773 << (PUD_SHIFT - PAGE_SHIFT);
774 end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
775 if (start_pfn < end_pfn) {
776 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
777 page_size_mask &
778 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
779 pos = end_pfn << PAGE_SHIFT;
780 }
781
782 /* tail is not big page (1G) alignment */
783 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
784 << (PMD_SHIFT - PAGE_SHIFT);
785 end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
786 if (start_pfn < end_pfn) {
787 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
788 page_size_mask & (1<<PG_LEVEL_2M));
789 pos = end_pfn << PAGE_SHIFT;
790 }
791
792 /* tail is not big page (2M) alignment */
793 start_pfn = pos>>PAGE_SHIFT;
794 end_pfn = end>>PAGE_SHIFT;
795 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
796
797 /* try to merge same page size and continuous */
798 for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
799 unsigned long old_start;
800 if (mr[i].end != mr[i+1].start ||
801 mr[i].page_size_mask != mr[i+1].page_size_mask)
802 continue;
803 /* move it */
804 old_start = mr[i].start;
805 memmove(&mr[i], &mr[i+1],
806 (nr_range - 1 - i) * sizeof (struct map_range));
807 mr[i--].start = old_start;
808 nr_range--;
809 }
810
811 for (i = 0; i < nr_range; i++)
812 printk(KERN_DEBUG " %010lx - %010lx page %s\n",
813 mr[i].start, mr[i].end,
814 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
815 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
816
817 if (!after_bootmem)
818 find_early_table_space(end, use_pse, use_gbpages);
819
820 for (i = 0; i < nr_range; i++)
821 last_map_addr = kernel_physical_mapping_init(
822 mr[i].start, mr[i].end,
823 mr[i].page_size_mask);
824
825 if (!after_bootmem)
826 mmu_cr4_features = read_cr4();
827 __flush_tlb_all();
828
829 if (!after_bootmem && table_end > table_start)
830 reserve_early(table_start << PAGE_SHIFT,
831 table_end << PAGE_SHIFT, "PGTABLE");
832
833 printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
834 last_map_addr, end);
835
836 if (!after_bootmem)
837 early_memtest(start, end);
838
839 return last_map_addr >> PAGE_SHIFT;
840}
841
842#ifndef CONFIG_NUMA 613#ifndef CONFIG_NUMA
843void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) 614void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
844{ 615{
@@ -910,28 +681,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
910 681
911#endif /* CONFIG_MEMORY_HOTPLUG */ 682#endif /* CONFIG_MEMORY_HOTPLUG */
912 683
913/*
914 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
915 * is valid. The argument is a physical page number.
916 *
917 *
918 * On x86, access has to be given to the first megabyte of ram because that area
919 * contains bios code and data regions used by X and dosemu and similar apps.
920 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
921 * mmio resources as well as potential bios/acpi data regions.
922 */
923int devmem_is_allowed(unsigned long pagenr)
924{
925 if (pagenr <= 256)
926 return 1;
927 if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
928 return 0;
929 if (!page_is_ram(pagenr))
930 return 1;
931 return 0;
932}
933
934
935static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, 684static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
936 kcore_modules, kcore_vsyscall; 685 kcore_modules, kcore_vsyscall;
937 686
@@ -1019,13 +768,6 @@ void mark_rodata_ro(void)
1019 768
1020#endif 769#endif
1021 770
1022#ifdef CONFIG_BLK_DEV_INITRD
1023void free_initrd_mem(unsigned long start, unsigned long end)
1024{
1025 free_init_pages("initrd memory", start, end);
1026}
1027#endif
1028
1029int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, 771int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
1030 int flags) 772 int flags)
1031{ 773{
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 433f7bd4648a..aca924a30ee6 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -38,8 +38,7 @@ unsigned long __phys_addr(unsigned long x)
38 } else { 38 } else {
39 VIRTUAL_BUG_ON(x < PAGE_OFFSET); 39 VIRTUAL_BUG_ON(x < PAGE_OFFSET);
40 x -= PAGE_OFFSET; 40 x -= PAGE_OFFSET;
41 VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM : 41 VIRTUAL_BUG_ON(!phys_addr_valid(x));
42 !phys_addr_valid(x));
43 } 42 }
44 return x; 43 return x;
45} 44}
@@ -56,10 +55,8 @@ bool __virt_addr_valid(unsigned long x)
56 if (x < PAGE_OFFSET) 55 if (x < PAGE_OFFSET)
57 return false; 56 return false;
58 x -= PAGE_OFFSET; 57 x -= PAGE_OFFSET;
59 if (system_state == SYSTEM_BOOTING ? 58 if (!phys_addr_valid(x))
60 x > MAXMEM : !phys_addr_valid(x)) {
61 return false; 59 return false;
62 }
63 } 60 }
64 61
65 return pfn_valid(x >> PAGE_SHIFT); 62 return pfn_valid(x >> PAGE_SHIFT);
@@ -76,10 +73,9 @@ static inline int phys_addr_valid(unsigned long addr)
76#ifdef CONFIG_DEBUG_VIRTUAL 73#ifdef CONFIG_DEBUG_VIRTUAL
77unsigned long __phys_addr(unsigned long x) 74unsigned long __phys_addr(unsigned long x)
78{ 75{
79 /* VMALLOC_* aren't constants; not available at the boot time */ 76 /* VMALLOC_* aren't constants */
80 VIRTUAL_BUG_ON(x < PAGE_OFFSET); 77 VIRTUAL_BUG_ON(x < PAGE_OFFSET);
81 VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING && 78 VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
82 is_vmalloc_addr((void *) x));
83 return x - PAGE_OFFSET; 79 return x - PAGE_OFFSET;
84} 80}
85EXPORT_SYMBOL(__phys_addr); 81EXPORT_SYMBOL(__phys_addr);
@@ -89,7 +85,9 @@ bool __virt_addr_valid(unsigned long x)
89{ 85{
90 if (x < PAGE_OFFSET) 86 if (x < PAGE_OFFSET)
91 return false; 87 return false;
92 if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x)) 88 if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
89 return false;
90 if (x >= FIXADDR_START)
93 return false; 91 return false;
94 return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); 92 return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
95} 93}
@@ -508,13 +506,19 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr)
508 return &bm_pte[pte_index(addr)]; 506 return &bm_pte[pte_index(addr)];
509} 507}
510 508
509static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
510
511void __init early_ioremap_init(void) 511void __init early_ioremap_init(void)
512{ 512{
513 pmd_t *pmd; 513 pmd_t *pmd;
514 int i;
514 515
515 if (early_ioremap_debug) 516 if (early_ioremap_debug)
516 printk(KERN_INFO "early_ioremap_init()\n"); 517 printk(KERN_INFO "early_ioremap_init()\n");
517 518
519 for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
520 slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
521
518 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); 522 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
519 memset(bm_pte, 0, sizeof(bm_pte)); 523 memset(bm_pte, 0, sizeof(bm_pte));
520 pmd_populate_kernel(&init_mm, pmd, bm_pte); 524 pmd_populate_kernel(&init_mm, pmd, bm_pte);
@@ -581,6 +585,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
581 585
582static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; 586static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
583static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; 587static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
588
584static int __init check_early_ioremap_leak(void) 589static int __init check_early_ioremap_leak(void)
585{ 590{
586 int count = 0; 591 int count = 0;
@@ -602,7 +607,8 @@ static int __init check_early_ioremap_leak(void)
602} 607}
603late_initcall(check_early_ioremap_leak); 608late_initcall(check_early_ioremap_leak);
604 609
605static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) 610static void __init __iomem *
611__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
606{ 612{
607 unsigned long offset, last_addr; 613 unsigned long offset, last_addr;
608 unsigned int nrpages; 614 unsigned int nrpages;
@@ -668,9 +674,9 @@ static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned lo
668 --nrpages; 674 --nrpages;
669 } 675 }
670 if (early_ioremap_debug) 676 if (early_ioremap_debug)
671 printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); 677 printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]);
672 678
673 prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); 679 prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]);
674 return prev_map[slot]; 680 return prev_map[slot];
675} 681}
676 682
@@ -738,8 +744,3 @@ void __init early_iounmap(void __iomem *addr, unsigned long size)
738 } 744 }
739 prev_map[slot] = NULL; 745 prev_map[slot] = NULL;
740} 746}
741
742void __this_fixmap_does_not_exist(void)
743{
744 WARN_ON(1);
745}
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 93d82038af4b..6a518dd08a36 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -32,11 +32,14 @@ struct kmmio_fault_page {
32 struct list_head list; 32 struct list_head list;
33 struct kmmio_fault_page *release_next; 33 struct kmmio_fault_page *release_next;
34 unsigned long page; /* location of the fault page */ 34 unsigned long page; /* location of the fault page */
35 bool old_presence; /* page presence prior to arming */
36 bool armed;
35 37
36 /* 38 /*
37 * Number of times this page has been registered as a part 39 * Number of times this page has been registered as a part
38 * of a probe. If zero, page is disarmed and this may be freed. 40 * of a probe. If zero, page is disarmed and this may be freed.
39 * Used only by writers (RCU). 41 * Used only by writers (RCU) and post_kmmio_handler().
42 * Protected by kmmio_lock, when linked into kmmio_page_table.
40 */ 43 */
41 int count; 44 int count;
42}; 45};
@@ -105,57 +108,85 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
105 return NULL; 108 return NULL;
106} 109}
107 110
108static void set_page_present(unsigned long addr, bool present, 111static void set_pmd_presence(pmd_t *pmd, bool present, bool *old)
109 unsigned int *pglevel) 112{
113 pmdval_t v = pmd_val(*pmd);
114 *old = !!(v & _PAGE_PRESENT);
115 v &= ~_PAGE_PRESENT;
116 if (present)
117 v |= _PAGE_PRESENT;
118 set_pmd(pmd, __pmd(v));
119}
120
121static void set_pte_presence(pte_t *pte, bool present, bool *old)
122{
123 pteval_t v = pte_val(*pte);
124 *old = !!(v & _PAGE_PRESENT);
125 v &= ~_PAGE_PRESENT;
126 if (present)
127 v |= _PAGE_PRESENT;
128 set_pte_atomic(pte, __pte(v));
129}
130
131static int set_page_presence(unsigned long addr, bool present, bool *old)
110{ 132{
111 pteval_t pteval;
112 pmdval_t pmdval;
113 unsigned int level; 133 unsigned int level;
114 pmd_t *pmd;
115 pte_t *pte = lookup_address(addr, &level); 134 pte_t *pte = lookup_address(addr, &level);
116 135
117 if (!pte) { 136 if (!pte) {
118 pr_err("kmmio: no pte for page 0x%08lx\n", addr); 137 pr_err("kmmio: no pte for page 0x%08lx\n", addr);
119 return; 138 return -1;
120 } 139 }
121 140
122 if (pglevel)
123 *pglevel = level;
124
125 switch (level) { 141 switch (level) {
126 case PG_LEVEL_2M: 142 case PG_LEVEL_2M:
127 pmd = (pmd_t *)pte; 143 set_pmd_presence((pmd_t *)pte, present, old);
128 pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
129 if (present)
130 pmdval |= _PAGE_PRESENT;
131 set_pmd(pmd, __pmd(pmdval));
132 break; 144 break;
133
134 case PG_LEVEL_4K: 145 case PG_LEVEL_4K:
135 pteval = pte_val(*pte) & ~_PAGE_PRESENT; 146 set_pte_presence(pte, present, old);
136 if (present)
137 pteval |= _PAGE_PRESENT;
138 set_pte_atomic(pte, __pte(pteval));
139 break; 147 break;
140
141 default: 148 default:
142 pr_err("kmmio: unexpected page level 0x%x.\n", level); 149 pr_err("kmmio: unexpected page level 0x%x.\n", level);
143 return; 150 return -1;
144 } 151 }
145 152
146 __flush_tlb_one(addr); 153 __flush_tlb_one(addr);
154 return 0;
147} 155}
148 156
149/** Mark the given page as not present. Access to it will trigger a fault. */ 157/*
150static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) 158 * Mark the given page as not present. Access to it will trigger a fault.
159 *
160 * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the
161 * protection is ignored here. RCU read lock is assumed held, so the struct
162 * will not disappear unexpectedly. Furthermore, the caller must guarantee,
163 * that double arming the same virtual address (page) cannot occur.
164 *
165 * Double disarming on the other hand is allowed, and may occur when a fault
166 * and mmiotrace shutdown happen simultaneously.
167 */
168static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
151{ 169{
152 set_page_present(page & PAGE_MASK, false, pglevel); 170 int ret;
171 WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
172 if (f->armed) {
173 pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
174 f->page, f->count, f->old_presence);
175 }
176 ret = set_page_presence(f->page, false, &f->old_presence);
177 WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
178 f->armed = true;
179 return ret;
153} 180}
154 181
155/** Mark the given page as present. */ 182/** Restore the given page to saved presence state. */
156static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) 183static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
157{ 184{
158 set_page_present(page & PAGE_MASK, true, pglevel); 185 bool tmp;
186 int ret = set_page_presence(f->page, f->old_presence, &tmp);
187 WARN_ONCE(ret < 0,
188 KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
189 f->armed = false;
159} 190}
160 191
161/* 192/*
@@ -202,28 +233,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
202 233
203 ctx = &get_cpu_var(kmmio_ctx); 234 ctx = &get_cpu_var(kmmio_ctx);
204 if (ctx->active) { 235 if (ctx->active) {
205 disarm_kmmio_fault_page(faultpage->page, NULL);
206 if (addr == ctx->addr) { 236 if (addr == ctx->addr) {
207 /* 237 /*
208 * On SMP we sometimes get recursive probe hits on the 238 * A second fault on the same page means some other
209 * same address. Context is already saved, fall out. 239 * condition needs handling by do_page_fault(), the
240 * page really not being present is the most common.
210 */ 241 */
211 pr_debug("kmmio: duplicate probe hit on CPU %d, for " 242 pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n",
212 "address 0x%08lx.\n", 243 addr, smp_processor_id());
213 smp_processor_id(), addr); 244
214 ret = 1; 245 if (!faultpage->old_presence)
215 goto no_kmmio_ctx; 246 pr_info("kmmio: unexpected secondary hit for "
216 } 247 "address 0x%08lx on CPU %d.\n", addr,
217 /* 248 smp_processor_id());
218 * Prevent overwriting already in-flight context. 249 } else {
219 * This should not happen, let's hope disarming at least 250 /*
220 * prevents a panic. 251 * Prevent overwriting already in-flight context.
221 */ 252 * This should not happen, let's hope disarming at
222 pr_emerg("kmmio: recursive probe hit on CPU %d, " 253 * least prevents a panic.
254 */
255 pr_emerg("kmmio: recursive probe hit on CPU %d, "
223 "for address 0x%08lx. Ignoring.\n", 256 "for address 0x%08lx. Ignoring.\n",
224 smp_processor_id(), addr); 257 smp_processor_id(), addr);
225 pr_emerg("kmmio: previous hit was at 0x%08lx.\n", 258 pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
226 ctx->addr); 259 ctx->addr);
260 disarm_kmmio_fault_page(faultpage);
261 }
227 goto no_kmmio_ctx; 262 goto no_kmmio_ctx;
228 } 263 }
229 ctx->active++; 264 ctx->active++;
@@ -244,7 +279,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
244 regs->flags &= ~X86_EFLAGS_IF; 279 regs->flags &= ~X86_EFLAGS_IF;
245 280
246 /* Now we set present bit in PTE and single step. */ 281 /* Now we set present bit in PTE and single step. */
247 disarm_kmmio_fault_page(ctx->fpage->page, NULL); 282 disarm_kmmio_fault_page(ctx->fpage);
248 283
249 /* 284 /*
250 * If another cpu accesses the same page while we are stepping, 285 * If another cpu accesses the same page while we are stepping,
@@ -275,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
275 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
276 311
277 if (!ctx->active) { 312 if (!ctx->active) {
278 pr_debug("kmmio: spurious debug trap on CPU %d.\n", 313 pr_warning("kmmio: spurious debug trap on CPU %d.\n",
279 smp_processor_id()); 314 smp_processor_id());
280 goto out; 315 goto out;
281 } 316 }
@@ -283,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
283 if (ctx->probe && ctx->probe->post_handler) 318 if (ctx->probe && ctx->probe->post_handler)
284 ctx->probe->post_handler(ctx->probe, condition, regs); 319 ctx->probe->post_handler(ctx->probe, condition, regs);
285 320
286 arm_kmmio_fault_page(ctx->fpage->page, NULL); 321 /* Prevent racing against release_kmmio_fault_page(). */
322 spin_lock(&kmmio_lock);
323 if (ctx->fpage->count)
324 arm_kmmio_fault_page(ctx->fpage);
325 spin_unlock(&kmmio_lock);
287 326
288 regs->flags &= ~X86_EFLAGS_TF; 327 regs->flags &= ~X86_EFLAGS_TF;
289 regs->flags |= ctx->saved_flags; 328 regs->flags |= ctx->saved_flags;
@@ -315,20 +354,24 @@ static int add_kmmio_fault_page(unsigned long page)
315 f = get_kmmio_fault_page(page); 354 f = get_kmmio_fault_page(page);
316 if (f) { 355 if (f) {
317 if (!f->count) 356 if (!f->count)
318 arm_kmmio_fault_page(f->page, NULL); 357 arm_kmmio_fault_page(f);
319 f->count++; 358 f->count++;
320 return 0; 359 return 0;
321 } 360 }
322 361
323 f = kmalloc(sizeof(*f), GFP_ATOMIC); 362 f = kzalloc(sizeof(*f), GFP_ATOMIC);
324 if (!f) 363 if (!f)
325 return -1; 364 return -1;
326 365
327 f->count = 1; 366 f->count = 1;
328 f->page = page; 367 f->page = page;
329 list_add_rcu(&f->list, kmmio_page_list(f->page));
330 368
331 arm_kmmio_fault_page(f->page, NULL); 369 if (arm_kmmio_fault_page(f)) {
370 kfree(f);
371 return -1;
372 }
373
374 list_add_rcu(&f->list, kmmio_page_list(f->page));
332 375
333 return 0; 376 return 0;
334} 377}
@@ -347,7 +390,7 @@ static void release_kmmio_fault_page(unsigned long page,
347 f->count--; 390 f->count--;
348 BUG_ON(f->count < 0); 391 BUG_ON(f->count < 0);
349 if (!f->count) { 392 if (!f->count) {
350 disarm_kmmio_fault_page(f->page, NULL); 393 disarm_kmmio_fault_page(f);
351 f->release_next = *release_list; 394 f->release_next = *release_list;
352 *release_list = f; 395 *release_list = f;
353 } 396 }
@@ -408,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
408 451
409static void remove_kmmio_fault_pages(struct rcu_head *head) 452static void remove_kmmio_fault_pages(struct rcu_head *head)
410{ 453{
411 struct kmmio_delayed_release *dr = container_of( 454 struct kmmio_delayed_release *dr =
412 head, 455 container_of(head, struct kmmio_delayed_release, rcu);
413 struct kmmio_delayed_release,
414 rcu);
415 struct kmmio_fault_page *p = dr->release_list; 456 struct kmmio_fault_page *p = dr->release_list;
416 struct kmmio_fault_page **prevp = &dr->release_list; 457 struct kmmio_fault_page **prevp = &dr->release_list;
417 unsigned long flags; 458 unsigned long flags;
459
418 spin_lock_irqsave(&kmmio_lock, flags); 460 spin_lock_irqsave(&kmmio_lock, flags);
419 while (p) { 461 while (p) {
420 if (!p->count) 462 if (!p->count) {
421 list_del_rcu(&p->list); 463 list_del_rcu(&p->list);
422 else 464 prevp = &p->release_next;
465 } else {
423 *prevp = p->release_next; 466 *prevp = p->release_next;
424 prevp = &p->release_next; 467 }
425 p = p->release_next; 468 p = p->release_next;
426 } 469 }
427 spin_unlock_irqrestore(&kmmio_lock, flags); 470 spin_unlock_irqrestore(&kmmio_lock, flags);
471
428 /* This is the real RCU destroy call. */ 472 /* This is the real RCU destroy call. */
429 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); 473 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
430} 474}
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 0bcd7883d036..605c8be06217 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -100,6 +100,9 @@ static int __init parse_memtest(char *arg)
100{ 100{
101 if (arg) 101 if (arg)
102 memtest_pattern = simple_strtoul(arg, NULL, 0); 102 memtest_pattern = simple_strtoul(arg, NULL, 0);
103 else
104 memtest_pattern = ARRAY_SIZE(patterns);
105
103 return 0; 106 return 0;
104} 107}
105 108
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 451fe95a0352..3daefa04ace5 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -416,10 +416,11 @@ void __init initmem_init(unsigned long start_pfn,
416 for_each_online_node(nid) 416 for_each_online_node(nid)
417 propagate_e820_map_node(nid); 417 propagate_e820_map_node(nid);
418 418
419 for_each_online_node(nid) 419 for_each_online_node(nid) {
420 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 420 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
421 NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
422 }
421 423
422 NODE_DATA(0)->bdata = &bootmem_node_data[0];
423 setup_bootmem_allocator(); 424 setup_bootmem_allocator();
424} 425}
425 426
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index ab50a8d7402c..427fd1b56df5 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Written by Pekka Paalanen, 2008 <pq@iki.fi> 2 * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi>
3 */ 3 */
4#include <linux/module.h> 4#include <linux/module.h>
5#include <linux/io.h> 5#include <linux/io.h>
@@ -9,35 +9,74 @@
9 9
10static unsigned long mmio_address; 10static unsigned long mmio_address;
11module_param(mmio_address, ulong, 0); 11module_param(mmio_address, ulong, 0);
12MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); 12MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
13 "(or 8 MB if read_far is non-zero).");
14
15static unsigned long read_far = 0x400100;
16module_param(read_far, ulong, 0);
17MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB "
18 "(default: 0x400100).");
19
20static unsigned v16(unsigned i)
21{
22 return i * 12 + 7;
23}
24
25static unsigned v32(unsigned i)
26{
27 return i * 212371 + 13;
28}
13 29
14static void do_write_test(void __iomem *p) 30static void do_write_test(void __iomem *p)
15{ 31{
16 unsigned int i; 32 unsigned int i;
33 pr_info(MODULE_NAME ": write test.\n");
17 mmiotrace_printk("Write test.\n"); 34 mmiotrace_printk("Write test.\n");
35
18 for (i = 0; i < 256; i++) 36 for (i = 0; i < 256; i++)
19 iowrite8(i, p + i); 37 iowrite8(i, p + i);
38
20 for (i = 1024; i < (5 * 1024); i += 2) 39 for (i = 1024; i < (5 * 1024); i += 2)
21 iowrite16(i * 12 + 7, p + i); 40 iowrite16(v16(i), p + i);
41
22 for (i = (5 * 1024); i < (16 * 1024); i += 4) 42 for (i = (5 * 1024); i < (16 * 1024); i += 4)
23 iowrite32(i * 212371 + 13, p + i); 43 iowrite32(v32(i), p + i);
24} 44}
25 45
26static void do_read_test(void __iomem *p) 46static void do_read_test(void __iomem *p)
27{ 47{
28 unsigned int i; 48 unsigned int i;
49 unsigned errs[3] = { 0 };
50 pr_info(MODULE_NAME ": read test.\n");
29 mmiotrace_printk("Read test.\n"); 51 mmiotrace_printk("Read test.\n");
52
30 for (i = 0; i < 256; i++) 53 for (i = 0; i < 256; i++)
31 ioread8(p + i); 54 if (ioread8(p + i) != i)
55 ++errs[0];
56
32 for (i = 1024; i < (5 * 1024); i += 2) 57 for (i = 1024; i < (5 * 1024); i += 2)
33 ioread16(p + i); 58 if (ioread16(p + i) != v16(i))
59 ++errs[1];
60
34 for (i = (5 * 1024); i < (16 * 1024); i += 4) 61 for (i = (5 * 1024); i < (16 * 1024); i += 4)
35 ioread32(p + i); 62 if (ioread32(p + i) != v32(i))
63 ++errs[2];
64
65 mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n",
66 errs[0], errs[1], errs[2]);
36} 67}
37 68
38static void do_test(void) 69static void do_read_far_test(void __iomem *p)
39{ 70{
40 void __iomem *p = ioremap_nocache(mmio_address, 0x4000); 71 pr_info(MODULE_NAME ": read far test.\n");
72 mmiotrace_printk("Read far test.\n");
73
74 ioread32(p + read_far);
75}
76
77static void do_test(unsigned long size)
78{
79 void __iomem *p = ioremap_nocache(mmio_address, size);
41 if (!p) { 80 if (!p) {
42 pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); 81 pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
43 return; 82 return;
@@ -45,11 +84,15 @@ static void do_test(void)
45 mmiotrace_printk("ioremap returned %p.\n", p); 84 mmiotrace_printk("ioremap returned %p.\n", p);
46 do_write_test(p); 85 do_write_test(p);
47 do_read_test(p); 86 do_read_test(p);
87 if (read_far && read_far < size - 4)
88 do_read_far_test(p);
48 iounmap(p); 89 iounmap(p);
49} 90}
50 91
51static int __init init(void) 92static int __init init(void)
52{ 93{
94 unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
95
53 if (mmio_address == 0) { 96 if (mmio_address == 0) {
54 pr_err(MODULE_NAME ": you have to use the module argument " 97 pr_err(MODULE_NAME ": you have to use the module argument "
55 "mmio_address.\n"); 98 "mmio_address.\n");
@@ -58,10 +101,11 @@ static int __init init(void)
58 return -ENXIO; 101 return -ENXIO;
59 } 102 }
60 103
61 pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " 104 pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI "
62 "in PCI address space, and writing " 105 "address space, and writing 16 kB of rubbish in there.\n",
63 "rubbish in there.\n", mmio_address); 106 size >> 10, mmio_address);
64 do_test(); 107 do_test(size);
108 pr_info(MODULE_NAME ": All done.\n");
65 return 0; 109 return 0;
66} 110}
67 111