diff options
author | Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> | 2009-04-07 16:34:16 -0400 |
---|---|---|
committer | Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> | 2009-04-07 16:34:16 -0400 |
commit | 38f4b8c0da01ae7cd9b93386842ce272d6fde9ab (patch) | |
tree | 3c8c52201aac038094bfea7efdd0984a8f62045e /arch/x86/mm | |
parent | a811454027352c762e0d5bba1b1d8f7d26bf96ae (diff) | |
parent | 8e2c4f2844c0e8dcdfe312e5f2204854ca8532c6 (diff) |
Merge commit 'origin/master' into for-linus/xen/master
* commit 'origin/master': (4825 commits)
Fix build errors due to CONFIG_BRANCH_TRACER=y
parport: Use the PCI IRQ if offered
tty: jsm cleanups
Adjust path to gpio headers
KGDB_SERIAL_CONSOLE check for module
Change KCONFIG name
tty: Blackin CTS/RTS
Change hardware flow control from poll to interrupt driven
Add support for the MAX3100 SPI UART.
lanana: assign a device name and numbering for MAX3100
serqt: initial clean up pass for tty side
tty: Use the generic RS485 ioctl on CRIS
tty: Correct inline types for tty_driver_kref_get()
splice: fix deadlock in splicing to file
nilfs2: support nanosecond timestamp
nilfs2: introduce secondary super block
nilfs2: simplify handling of active state of segments
nilfs2: mark minor flag for checkpoint created by internal operation
nilfs2: clean up sketch file
nilfs2: super block operations fix endian bug
...
Conflicts:
arch/x86/include/asm/thread_info.h
arch/x86/lguest/boot.c
drivers/xen/manage.c
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/mm/highmem_32.c | 69 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 344 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 291 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 317 | ||||
-rw-r--r-- | arch/x86/mm/iomap_32.c | 30 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 52 | ||||
-rw-r--r-- | arch/x86/mm/kmmio.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/memtest.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/mmio-mod.c | 19 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 67 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 5 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 111 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 147 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 5 | ||||
-rw-r--r-- | arch/x86/mm/pgtable_32.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 30 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 5 |
18 files changed, 776 insertions, 725 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 08537747cb58..fdd30d08ab52 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -14,7 +14,7 @@ obj-$(CONFIG_MMIOTRACE) += mmiotrace.o | |||
14 | mmiotrace-y := kmmio.o pf_in.o mmio-mod.o | 14 | mmiotrace-y := kmmio.o pf_in.o mmio-mod.o |
15 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o | 15 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o |
16 | 16 | ||
17 | obj-$(CONFIG_NUMA) += numa_$(BITS).o | 17 | obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o |
18 | obj-$(CONFIG_K8_NUMA) += k8topology_64.o | 18 | obj-$(CONFIG_K8_NUMA) += k8topology_64.o |
19 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o | 19 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o |
20 | 20 | ||
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index e81dfa408157..58f621e81919 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c | |||
@@ -19,49 +19,6 @@ void kunmap(struct page *page) | |||
19 | kunmap_high(page); | 19 | kunmap_high(page); |
20 | } | 20 | } |
21 | 21 | ||
22 | static void debug_kmap_atomic_prot(enum km_type type) | ||
23 | { | ||
24 | #ifdef CONFIG_DEBUG_HIGHMEM | ||
25 | static unsigned warn_count = 10; | ||
26 | |||
27 | if (unlikely(warn_count == 0)) | ||
28 | return; | ||
29 | |||
30 | if (unlikely(in_interrupt())) { | ||
31 | if (in_irq()) { | ||
32 | if (type != KM_IRQ0 && type != KM_IRQ1 && | ||
33 | type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ && | ||
34 | type != KM_BOUNCE_READ) { | ||
35 | WARN_ON(1); | ||
36 | warn_count--; | ||
37 | } | ||
38 | } else if (!irqs_disabled()) { /* softirq */ | ||
39 | if (type != KM_IRQ0 && type != KM_IRQ1 && | ||
40 | type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 && | ||
41 | type != KM_SKB_SUNRPC_DATA && | ||
42 | type != KM_SKB_DATA_SOFTIRQ && | ||
43 | type != KM_BOUNCE_READ) { | ||
44 | WARN_ON(1); | ||
45 | warn_count--; | ||
46 | } | ||
47 | } | ||
48 | } | ||
49 | |||
50 | if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || | ||
51 | type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) { | ||
52 | if (!irqs_disabled()) { | ||
53 | WARN_ON(1); | ||
54 | warn_count--; | ||
55 | } | ||
56 | } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) { | ||
57 | if (irq_count() == 0 && !irqs_disabled()) { | ||
58 | WARN_ON(1); | ||
59 | warn_count--; | ||
60 | } | ||
61 | } | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | /* | 22 | /* |
66 | * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because | 23 | * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because |
67 | * no global lock is needed and because the kmap code must perform a global TLB | 24 | * no global lock is needed and because the kmap code must perform a global TLB |
@@ -81,7 +38,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) | |||
81 | if (!PageHighMem(page)) | 38 | if (!PageHighMem(page)) |
82 | return page_address(page); | 39 | return page_address(page); |
83 | 40 | ||
84 | debug_kmap_atomic_prot(type); | 41 | debug_kmap_atomic(type); |
85 | 42 | ||
86 | idx = type + KM_TYPE_NR*smp_processor_id(); | 43 | idx = type + KM_TYPE_NR*smp_processor_id(); |
87 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | 44 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
@@ -119,22 +76,13 @@ void kunmap_atomic(void *kvaddr, enum km_type type) | |||
119 | pagefault_enable(); | 76 | pagefault_enable(); |
120 | } | 77 | } |
121 | 78 | ||
122 | /* This is the same as kmap_atomic() but can map memory that doesn't | 79 | /* |
80 | * This is the same as kmap_atomic() but can map memory that doesn't | ||
123 | * have a struct page associated with it. | 81 | * have a struct page associated with it. |
124 | */ | 82 | */ |
125 | void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) | 83 | void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) |
126 | { | 84 | { |
127 | enum fixed_addresses idx; | 85 | return kmap_atomic_prot_pfn(pfn, type, kmap_prot); |
128 | unsigned long vaddr; | ||
129 | |||
130 | pagefault_disable(); | ||
131 | |||
132 | idx = type + KM_TYPE_NR*smp_processor_id(); | ||
133 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | ||
134 | set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); | ||
135 | arch_flush_lazy_mmu_mode(); | ||
136 | |||
137 | return (void*) vaddr; | ||
138 | } | 86 | } |
139 | EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ | 87 | EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ |
140 | 88 | ||
@@ -156,7 +104,6 @@ EXPORT_SYMBOL(kunmap); | |||
156 | EXPORT_SYMBOL(kmap_atomic); | 104 | EXPORT_SYMBOL(kmap_atomic); |
157 | EXPORT_SYMBOL(kunmap_atomic); | 105 | EXPORT_SYMBOL(kunmap_atomic); |
158 | 106 | ||
159 | #ifdef CONFIG_NUMA | ||
160 | void __init set_highmem_pages_init(void) | 107 | void __init set_highmem_pages_init(void) |
161 | { | 108 | { |
162 | struct zone *zone; | 109 | struct zone *zone; |
@@ -180,11 +127,3 @@ void __init set_highmem_pages_init(void) | |||
180 | } | 127 | } |
181 | totalram_pages += totalhigh_pages; | 128 | totalram_pages += totalhigh_pages; |
182 | } | 129 | } |
183 | #else | ||
184 | void __init set_highmem_pages_init(void) | ||
185 | { | ||
186 | add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); | ||
187 | |||
188 | totalram_pages += totalhigh_pages; | ||
189 | } | ||
190 | #endif /* CONFIG_NUMA */ | ||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ce6a722587d8..fd3da1dda1c9 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -1,8 +1,345 @@ | |||
1 | #include <linux/ioport.h> | ||
1 | #include <linux/swap.h> | 2 | #include <linux/swap.h> |
3 | |||
2 | #include <asm/cacheflush.h> | 4 | #include <asm/cacheflush.h> |
5 | #include <asm/e820.h> | ||
6 | #include <asm/init.h> | ||
3 | #include <asm/page.h> | 7 | #include <asm/page.h> |
8 | #include <asm/page_types.h> | ||
4 | #include <asm/sections.h> | 9 | #include <asm/sections.h> |
5 | #include <asm/system.h> | 10 | #include <asm/system.h> |
11 | #include <asm/tlbflush.h> | ||
12 | |||
13 | unsigned long __initdata e820_table_start; | ||
14 | unsigned long __meminitdata e820_table_end; | ||
15 | unsigned long __meminitdata e820_table_top; | ||
16 | |||
17 | int after_bootmem; | ||
18 | |||
19 | int direct_gbpages | ||
20 | #ifdef CONFIG_DIRECT_GBPAGES | ||
21 | = 1 | ||
22 | #endif | ||
23 | ; | ||
24 | |||
25 | static void __init find_early_table_space(unsigned long end, int use_pse, | ||
26 | int use_gbpages) | ||
27 | { | ||
28 | unsigned long puds, pmds, ptes, tables, start; | ||
29 | |||
30 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | ||
31 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); | ||
32 | |||
33 | if (use_gbpages) { | ||
34 | unsigned long extra; | ||
35 | |||
36 | extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); | ||
37 | pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; | ||
38 | } else | ||
39 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | ||
40 | |||
41 | tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); | ||
42 | |||
43 | if (use_pse) { | ||
44 | unsigned long extra; | ||
45 | |||
46 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); | ||
47 | #ifdef CONFIG_X86_32 | ||
48 | extra += PMD_SIZE; | ||
49 | #endif | ||
50 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
51 | } else | ||
52 | ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
53 | |||
54 | tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); | ||
55 | |||
56 | #ifdef CONFIG_X86_32 | ||
57 | /* for fixmap */ | ||
58 | tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); | ||
59 | #endif | ||
60 | |||
61 | /* | ||
62 | * RED-PEN putting page tables only on node 0 could | ||
63 | * cause a hotspot and fill up ZONE_DMA. The page tables | ||
64 | * need roughly 0.5KB per GB. | ||
65 | */ | ||
66 | #ifdef CONFIG_X86_32 | ||
67 | start = 0x7000; | ||
68 | e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | ||
69 | tables, PAGE_SIZE); | ||
70 | #else /* CONFIG_X86_64 */ | ||
71 | start = 0x8000; | ||
72 | e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE); | ||
73 | #endif | ||
74 | if (e820_table_start == -1UL) | ||
75 | panic("Cannot find space for the kernel page tables"); | ||
76 | |||
77 | e820_table_start >>= PAGE_SHIFT; | ||
78 | e820_table_end = e820_table_start; | ||
79 | e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); | ||
80 | |||
81 | printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", | ||
82 | end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT); | ||
83 | } | ||
84 | |||
85 | struct map_range { | ||
86 | unsigned long start; | ||
87 | unsigned long end; | ||
88 | unsigned page_size_mask; | ||
89 | }; | ||
90 | |||
91 | #ifdef CONFIG_X86_32 | ||
92 | #define NR_RANGE_MR 3 | ||
93 | #else /* CONFIG_X86_64 */ | ||
94 | #define NR_RANGE_MR 5 | ||
95 | #endif | ||
96 | |||
97 | static int __meminit save_mr(struct map_range *mr, int nr_range, | ||
98 | unsigned long start_pfn, unsigned long end_pfn, | ||
99 | unsigned long page_size_mask) | ||
100 | { | ||
101 | if (start_pfn < end_pfn) { | ||
102 | if (nr_range >= NR_RANGE_MR) | ||
103 | panic("run out of range for init_memory_mapping\n"); | ||
104 | mr[nr_range].start = start_pfn<<PAGE_SHIFT; | ||
105 | mr[nr_range].end = end_pfn<<PAGE_SHIFT; | ||
106 | mr[nr_range].page_size_mask = page_size_mask; | ||
107 | nr_range++; | ||
108 | } | ||
109 | |||
110 | return nr_range; | ||
111 | } | ||
112 | |||
113 | #ifdef CONFIG_X86_64 | ||
114 | static void __init init_gbpages(void) | ||
115 | { | ||
116 | if (direct_gbpages && cpu_has_gbpages) | ||
117 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
118 | else | ||
119 | direct_gbpages = 0; | ||
120 | } | ||
121 | #else | ||
122 | static inline void init_gbpages(void) | ||
123 | { | ||
124 | } | ||
125 | #endif | ||
126 | |||
127 | /* | ||
128 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | ||
129 | * This runs before bootmem is initialized and gets pages directly from | ||
130 | * the physical memory. To access them they are temporarily mapped. | ||
131 | */ | ||
132 | unsigned long __init_refok init_memory_mapping(unsigned long start, | ||
133 | unsigned long end) | ||
134 | { | ||
135 | unsigned long page_size_mask = 0; | ||
136 | unsigned long start_pfn, end_pfn; | ||
137 | unsigned long ret = 0; | ||
138 | unsigned long pos; | ||
139 | |||
140 | struct map_range mr[NR_RANGE_MR]; | ||
141 | int nr_range, i; | ||
142 | int use_pse, use_gbpages; | ||
143 | |||
144 | printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); | ||
145 | |||
146 | if (!after_bootmem) | ||
147 | init_gbpages(); | ||
148 | |||
149 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
150 | /* | ||
151 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | ||
152 | * This will simplify cpa(), which otherwise needs to support splitting | ||
153 | * large pages into small in interrupt context, etc. | ||
154 | */ | ||
155 | use_pse = use_gbpages = 0; | ||
156 | #else | ||
157 | use_pse = cpu_has_pse; | ||
158 | use_gbpages = direct_gbpages; | ||
159 | #endif | ||
160 | |||
161 | #ifdef CONFIG_X86_32 | ||
162 | #ifdef CONFIG_X86_PAE | ||
163 | set_nx(); | ||
164 | if (nx_enabled) | ||
165 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | ||
166 | #endif | ||
167 | |||
168 | /* Enable PSE if available */ | ||
169 | if (cpu_has_pse) | ||
170 | set_in_cr4(X86_CR4_PSE); | ||
171 | |||
172 | /* Enable PGE if available */ | ||
173 | if (cpu_has_pge) { | ||
174 | set_in_cr4(X86_CR4_PGE); | ||
175 | __supported_pte_mask |= _PAGE_GLOBAL; | ||
176 | } | ||
177 | #endif | ||
178 | |||
179 | if (use_gbpages) | ||
180 | page_size_mask |= 1 << PG_LEVEL_1G; | ||
181 | if (use_pse) | ||
182 | page_size_mask |= 1 << PG_LEVEL_2M; | ||
183 | |||
184 | memset(mr, 0, sizeof(mr)); | ||
185 | nr_range = 0; | ||
186 | |||
187 | /* head if not big page alignment ? */ | ||
188 | start_pfn = start >> PAGE_SHIFT; | ||
189 | pos = start_pfn << PAGE_SHIFT; | ||
190 | #ifdef CONFIG_X86_32 | ||
191 | /* | ||
192 | * Don't use a large page for the first 2/4MB of memory | ||
193 | * because there are often fixed size MTRRs in there | ||
194 | * and overlapping MTRRs into large pages can cause | ||
195 | * slowdowns. | ||
196 | */ | ||
197 | if (pos == 0) | ||
198 | end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); | ||
199 | else | ||
200 | end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
201 | << (PMD_SHIFT - PAGE_SHIFT); | ||
202 | #else /* CONFIG_X86_64 */ | ||
203 | end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) | ||
204 | << (PMD_SHIFT - PAGE_SHIFT); | ||
205 | #endif | ||
206 | if (end_pfn > (end >> PAGE_SHIFT)) | ||
207 | end_pfn = end >> PAGE_SHIFT; | ||
208 | if (start_pfn < end_pfn) { | ||
209 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | ||
210 | pos = end_pfn << PAGE_SHIFT; | ||
211 | } | ||
212 | |||
213 | /* big page (2M) range */ | ||
214 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
215 | << (PMD_SHIFT - PAGE_SHIFT); | ||
216 | #ifdef CONFIG_X86_32 | ||
217 | end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | ||
218 | #else /* CONFIG_X86_64 */ | ||
219 | end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | ||
220 | << (PUD_SHIFT - PAGE_SHIFT); | ||
221 | if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) | ||
222 | end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); | ||
223 | #endif | ||
224 | |||
225 | if (start_pfn < end_pfn) { | ||
226 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
227 | page_size_mask & (1<<PG_LEVEL_2M)); | ||
228 | pos = end_pfn << PAGE_SHIFT; | ||
229 | } | ||
230 | |||
231 | #ifdef CONFIG_X86_64 | ||
232 | /* big page (1G) range */ | ||
233 | start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | ||
234 | << (PUD_SHIFT - PAGE_SHIFT); | ||
235 | end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); | ||
236 | if (start_pfn < end_pfn) { | ||
237 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
238 | page_size_mask & | ||
239 | ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); | ||
240 | pos = end_pfn << PAGE_SHIFT; | ||
241 | } | ||
242 | |||
243 | /* tail is not big page (1G) alignment */ | ||
244 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
245 | << (PMD_SHIFT - PAGE_SHIFT); | ||
246 | end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | ||
247 | if (start_pfn < end_pfn) { | ||
248 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
249 | page_size_mask & (1<<PG_LEVEL_2M)); | ||
250 | pos = end_pfn << PAGE_SHIFT; | ||
251 | } | ||
252 | #endif | ||
253 | |||
254 | /* tail is not big page (2M) alignment */ | ||
255 | start_pfn = pos>>PAGE_SHIFT; | ||
256 | end_pfn = end>>PAGE_SHIFT; | ||
257 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | ||
258 | |||
259 | /* try to merge same page size and continuous */ | ||
260 | for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { | ||
261 | unsigned long old_start; | ||
262 | if (mr[i].end != mr[i+1].start || | ||
263 | mr[i].page_size_mask != mr[i+1].page_size_mask) | ||
264 | continue; | ||
265 | /* move it */ | ||
266 | old_start = mr[i].start; | ||
267 | memmove(&mr[i], &mr[i+1], | ||
268 | (nr_range - 1 - i) * sizeof(struct map_range)); | ||
269 | mr[i--].start = old_start; | ||
270 | nr_range--; | ||
271 | } | ||
272 | |||
273 | for (i = 0; i < nr_range; i++) | ||
274 | printk(KERN_DEBUG " %010lx - %010lx page %s\n", | ||
275 | mr[i].start, mr[i].end, | ||
276 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( | ||
277 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | ||
278 | |||
279 | /* | ||
280 | * Find space for the kernel direct mapping tables. | ||
281 | * | ||
282 | * Later we should allocate these tables in the local node of the | ||
283 | * memory mapped. Unfortunately this is done currently before the | ||
284 | * nodes are discovered. | ||
285 | */ | ||
286 | if (!after_bootmem) | ||
287 | find_early_table_space(end, use_pse, use_gbpages); | ||
288 | |||
289 | #ifdef CONFIG_X86_32 | ||
290 | for (i = 0; i < nr_range; i++) | ||
291 | kernel_physical_mapping_init(mr[i].start, mr[i].end, | ||
292 | mr[i].page_size_mask); | ||
293 | ret = end; | ||
294 | #else /* CONFIG_X86_64 */ | ||
295 | for (i = 0; i < nr_range; i++) | ||
296 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, | ||
297 | mr[i].page_size_mask); | ||
298 | #endif | ||
299 | |||
300 | #ifdef CONFIG_X86_32 | ||
301 | early_ioremap_page_table_range_init(); | ||
302 | |||
303 | load_cr3(swapper_pg_dir); | ||
304 | #endif | ||
305 | |||
306 | #ifdef CONFIG_X86_64 | ||
307 | if (!after_bootmem) | ||
308 | mmu_cr4_features = read_cr4(); | ||
309 | #endif | ||
310 | __flush_tlb_all(); | ||
311 | |||
312 | if (!after_bootmem && e820_table_end > e820_table_start) | ||
313 | reserve_early(e820_table_start << PAGE_SHIFT, | ||
314 | e820_table_end << PAGE_SHIFT, "PGTABLE"); | ||
315 | |||
316 | if (!after_bootmem) | ||
317 | early_memtest(start, end); | ||
318 | |||
319 | return ret >> PAGE_SHIFT; | ||
320 | } | ||
321 | |||
322 | |||
323 | /* | ||
324 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | ||
325 | * is valid. The argument is a physical page number. | ||
326 | * | ||
327 | * | ||
328 | * On x86, access has to be given to the first megabyte of ram because that area | ||
329 | * contains bios code and data regions used by X and dosemu and similar apps. | ||
330 | * Access has to be given to non-kernel-ram areas as well, these contain the PCI | ||
331 | * mmio resources as well as potential bios/acpi data regions. | ||
332 | */ | ||
333 | int devmem_is_allowed(unsigned long pagenr) | ||
334 | { | ||
335 | if (pagenr <= 256) | ||
336 | return 1; | ||
337 | if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) | ||
338 | return 0; | ||
339 | if (!page_is_ram(pagenr)) | ||
340 | return 1; | ||
341 | return 0; | ||
342 | } | ||
6 | 343 | ||
7 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | 344 | void free_init_pages(char *what, unsigned long begin, unsigned long end) |
8 | { | 345 | { |
@@ -47,3 +384,10 @@ void free_initmem(void) | |||
47 | (unsigned long)(&__init_begin), | 384 | (unsigned long)(&__init_begin), |
48 | (unsigned long)(&__init_end)); | 385 | (unsigned long)(&__init_end)); |
49 | } | 386 | } |
387 | |||
388 | #ifdef CONFIG_BLK_DEV_INITRD | ||
389 | void free_initrd_mem(unsigned long start, unsigned long end) | ||
390 | { | ||
391 | free_init_pages("initrd memory", start, end); | ||
392 | } | ||
393 | #endif | ||
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 47df0e1bbeb9..749559ed80f5 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <asm/paravirt.h> | 49 | #include <asm/paravirt.h> |
50 | #include <asm/setup.h> | 50 | #include <asm/setup.h> |
51 | #include <asm/cacheflush.h> | 51 | #include <asm/cacheflush.h> |
52 | #include <asm/init.h> | ||
52 | 53 | ||
53 | unsigned long max_low_pfn_mapped; | 54 | unsigned long max_low_pfn_mapped; |
54 | unsigned long max_pfn_mapped; | 55 | unsigned long max_pfn_mapped; |
@@ -58,19 +59,14 @@ unsigned long highstart_pfn, highend_pfn; | |||
58 | 59 | ||
59 | static noinline int do_test_wp_bit(void); | 60 | static noinline int do_test_wp_bit(void); |
60 | 61 | ||
61 | 62 | bool __read_mostly __vmalloc_start_set = false; | |
62 | static unsigned long __initdata table_start; | ||
63 | static unsigned long __meminitdata table_end; | ||
64 | static unsigned long __meminitdata table_top; | ||
65 | |||
66 | static int __initdata after_init_bootmem; | ||
67 | 63 | ||
68 | static __init void *alloc_low_page(void) | 64 | static __init void *alloc_low_page(void) |
69 | { | 65 | { |
70 | unsigned long pfn = table_end++; | 66 | unsigned long pfn = e820_table_end++; |
71 | void *adr; | 67 | void *adr; |
72 | 68 | ||
73 | if (pfn >= table_top) | 69 | if (pfn >= e820_table_top) |
74 | panic("alloc_low_page: ran out of memory"); | 70 | panic("alloc_low_page: ran out of memory"); |
75 | 71 | ||
76 | adr = __va(pfn * PAGE_SIZE); | 72 | adr = __va(pfn * PAGE_SIZE); |
@@ -90,7 +86,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
90 | 86 | ||
91 | #ifdef CONFIG_X86_PAE | 87 | #ifdef CONFIG_X86_PAE |
92 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { | 88 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
93 | if (after_init_bootmem) | 89 | if (after_bootmem) |
94 | pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | 90 | pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
95 | else | 91 | else |
96 | pmd_table = (pmd_t *)alloc_low_page(); | 92 | pmd_table = (pmd_t *)alloc_low_page(); |
@@ -117,7 +113,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
117 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { | 113 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { |
118 | pte_t *page_table = NULL; | 114 | pte_t *page_table = NULL; |
119 | 115 | ||
120 | if (after_init_bootmem) { | 116 | if (after_bootmem) { |
121 | #ifdef CONFIG_DEBUG_PAGEALLOC | 117 | #ifdef CONFIG_DEBUG_PAGEALLOC |
122 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | 118 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); |
123 | #endif | 119 | #endif |
@@ -168,12 +164,12 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, | |||
168 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end | 164 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end |
169 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin | 165 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin |
170 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end | 166 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end |
171 | && ((__pa(pte) >> PAGE_SHIFT) < table_start | 167 | && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start |
172 | || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { | 168 | || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) { |
173 | pte_t *newpte; | 169 | pte_t *newpte; |
174 | int i; | 170 | int i; |
175 | 171 | ||
176 | BUG_ON(after_init_bootmem); | 172 | BUG_ON(after_bootmem); |
177 | newpte = alloc_low_page(); | 173 | newpte = alloc_low_page(); |
178 | for (i = 0; i < PTRS_PER_PTE; i++) | 174 | for (i = 0; i < PTRS_PER_PTE; i++) |
179 | set_pte(newpte + i, pte[i]); | 175 | set_pte(newpte + i, pte[i]); |
@@ -242,11 +238,14 @@ static inline int is_kernel_text(unsigned long addr) | |||
242 | * of max_low_pfn pages, by creating page tables starting from address | 238 | * of max_low_pfn pages, by creating page tables starting from address |
243 | * PAGE_OFFSET: | 239 | * PAGE_OFFSET: |
244 | */ | 240 | */ |
245 | static void __init kernel_physical_mapping_init(pgd_t *pgd_base, | 241 | unsigned long __init |
246 | unsigned long start_pfn, | 242 | kernel_physical_mapping_init(unsigned long start, |
247 | unsigned long end_pfn, | 243 | unsigned long end, |
248 | int use_pse) | 244 | unsigned long page_size_mask) |
249 | { | 245 | { |
246 | int use_pse = page_size_mask == (1<<PG_LEVEL_2M); | ||
247 | unsigned long start_pfn, end_pfn; | ||
248 | pgd_t *pgd_base = swapper_pg_dir; | ||
250 | int pgd_idx, pmd_idx, pte_ofs; | 249 | int pgd_idx, pmd_idx, pte_ofs; |
251 | unsigned long pfn; | 250 | unsigned long pfn; |
252 | pgd_t *pgd; | 251 | pgd_t *pgd; |
@@ -255,6 +254,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, | |||
255 | unsigned pages_2m, pages_4k; | 254 | unsigned pages_2m, pages_4k; |
256 | int mapping_iter; | 255 | int mapping_iter; |
257 | 256 | ||
257 | start_pfn = start >> PAGE_SHIFT; | ||
258 | end_pfn = end >> PAGE_SHIFT; | ||
259 | |||
258 | /* | 260 | /* |
259 | * First iteration will setup identity mapping using large/small pages | 261 | * First iteration will setup identity mapping using large/small pages |
260 | * based on use_pse, with other attributes same as set by | 262 | * based on use_pse, with other attributes same as set by |
@@ -369,26 +371,6 @@ repeat: | |||
369 | mapping_iter = 2; | 371 | mapping_iter = 2; |
370 | goto repeat; | 372 | goto repeat; |
371 | } | 373 | } |
372 | } | ||
373 | |||
374 | /* | ||
375 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | ||
376 | * is valid. The argument is a physical page number. | ||
377 | * | ||
378 | * | ||
379 | * On x86, access has to be given to the first megabyte of ram because that area | ||
380 | * contains bios code and data regions used by X and dosemu and similar apps. | ||
381 | * Access has to be given to non-kernel-ram areas as well, these contain the PCI | ||
382 | * mmio resources as well as potential bios/acpi data regions. | ||
383 | */ | ||
384 | int devmem_is_allowed(unsigned long pagenr) | ||
385 | { | ||
386 | if (pagenr <= 256) | ||
387 | return 1; | ||
388 | if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) | ||
389 | return 0; | ||
390 | if (!page_is_ram(pagenr)) | ||
391 | return 1; | ||
392 | return 0; | 374 | return 0; |
393 | } | 375 | } |
394 | 376 | ||
@@ -545,8 +527,9 @@ void __init native_pagetable_setup_done(pgd_t *base) | |||
545 | * be partially populated, and so it avoids stomping on any existing | 527 | * be partially populated, and so it avoids stomping on any existing |
546 | * mappings. | 528 | * mappings. |
547 | */ | 529 | */ |
548 | static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) | 530 | void __init early_ioremap_page_table_range_init(void) |
549 | { | 531 | { |
532 | pgd_t *pgd_base = swapper_pg_dir; | ||
550 | unsigned long vaddr, end; | 533 | unsigned long vaddr, end; |
551 | 534 | ||
552 | /* | 535 | /* |
@@ -641,7 +624,7 @@ static int __init noexec_setup(char *str) | |||
641 | } | 624 | } |
642 | early_param("noexec", noexec_setup); | 625 | early_param("noexec", noexec_setup); |
643 | 626 | ||
644 | static void __init set_nx(void) | 627 | void __init set_nx(void) |
645 | { | 628 | { |
646 | unsigned int v[4], l, h; | 629 | unsigned int v[4], l, h; |
647 | 630 | ||
@@ -793,6 +776,8 @@ void __init initmem_init(unsigned long start_pfn, | |||
793 | #ifdef CONFIG_FLATMEM | 776 | #ifdef CONFIG_FLATMEM |
794 | max_mapnr = num_physpages; | 777 | max_mapnr = num_physpages; |
795 | #endif | 778 | #endif |
779 | __vmalloc_start_set = true; | ||
780 | |||
796 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", | 781 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", |
797 | pages_to_mb(max_low_pfn)); | 782 | pages_to_mb(max_low_pfn)); |
798 | 783 | ||
@@ -814,176 +799,66 @@ static void __init zone_sizes_init(void) | |||
814 | free_area_init_nodes(max_zone_pfns); | 799 | free_area_init_nodes(max_zone_pfns); |
815 | } | 800 | } |
816 | 801 | ||
802 | static unsigned long __init setup_node_bootmem(int nodeid, | ||
803 | unsigned long start_pfn, | ||
804 | unsigned long end_pfn, | ||
805 | unsigned long bootmap) | ||
806 | { | ||
807 | unsigned long bootmap_size; | ||
808 | |||
809 | /* don't touch min_low_pfn */ | ||
810 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | ||
811 | bootmap >> PAGE_SHIFT, | ||
812 | start_pfn, end_pfn); | ||
813 | printk(KERN_INFO " node %d low ram: %08lx - %08lx\n", | ||
814 | nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
815 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", | ||
816 | nodeid, bootmap, bootmap + bootmap_size); | ||
817 | free_bootmem_with_active_regions(nodeid, end_pfn); | ||
818 | early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
819 | |||
820 | return bootmap + bootmap_size; | ||
821 | } | ||
822 | |||
817 | void __init setup_bootmem_allocator(void) | 823 | void __init setup_bootmem_allocator(void) |
818 | { | 824 | { |
819 | int i; | 825 | int nodeid; |
820 | unsigned long bootmap_size, bootmap; | 826 | unsigned long bootmap_size, bootmap; |
821 | /* | 827 | /* |
822 | * Initialize the boot-time allocator (with low memory only): | 828 | * Initialize the boot-time allocator (with low memory only): |
823 | */ | 829 | */ |
824 | bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; | 830 | bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; |
825 | bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, | 831 | bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size, |
826 | max_pfn_mapped<<PAGE_SHIFT, bootmap_size, | ||
827 | PAGE_SIZE); | 832 | PAGE_SIZE); |
828 | if (bootmap == -1L) | 833 | if (bootmap == -1L) |
829 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 834 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
830 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | 835 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); |
831 | 836 | ||
832 | /* don't touch min_low_pfn */ | ||
833 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, | ||
834 | min_low_pfn, max_low_pfn); | ||
835 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 837 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
836 | max_pfn_mapped<<PAGE_SHIFT); | 838 | max_pfn_mapped<<PAGE_SHIFT); |
837 | printk(KERN_INFO " low ram: %08lx - %08lx\n", | 839 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); |
838 | min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT); | ||
839 | printk(KERN_INFO " bootmap %08lx - %08lx\n", | ||
840 | bootmap, bootmap + bootmap_size); | ||
841 | for_each_online_node(i) | ||
842 | free_bootmem_with_active_regions(i, max_low_pfn); | ||
843 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); | ||
844 | |||
845 | after_init_bootmem = 1; | ||
846 | } | ||
847 | |||
848 | static void __init find_early_table_space(unsigned long end, int use_pse) | ||
849 | { | ||
850 | unsigned long puds, pmds, ptes, tables, start; | ||
851 | |||
852 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | ||
853 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); | ||
854 | |||
855 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | ||
856 | tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); | ||
857 | 840 | ||
858 | if (use_pse) { | 841 | for_each_online_node(nodeid) { |
859 | unsigned long extra; | 842 | unsigned long start_pfn, end_pfn; |
860 | |||
861 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); | ||
862 | extra += PMD_SIZE; | ||
863 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
864 | } else | ||
865 | ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
866 | |||
867 | tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); | ||
868 | |||
869 | /* for fixmap */ | ||
870 | tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); | ||
871 | |||
872 | /* | ||
873 | * RED-PEN putting page tables only on node 0 could | ||
874 | * cause a hotspot and fill up ZONE_DMA. The page tables | ||
875 | * need roughly 0.5KB per GB. | ||
876 | */ | ||
877 | start = 0x7000; | ||
878 | table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | ||
879 | tables, PAGE_SIZE); | ||
880 | if (table_start == -1UL) | ||
881 | panic("Cannot find space for the kernel page tables"); | ||
882 | |||
883 | table_start >>= PAGE_SHIFT; | ||
884 | table_end = table_start; | ||
885 | table_top = table_start + (tables>>PAGE_SHIFT); | ||
886 | |||
887 | printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", | ||
888 | end, table_start << PAGE_SHIFT, | ||
889 | (table_start << PAGE_SHIFT) + tables); | ||
890 | } | ||
891 | 843 | ||
892 | unsigned long __init_refok init_memory_mapping(unsigned long start, | 844 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
893 | unsigned long end) | 845 | start_pfn = node_start_pfn[nodeid]; |
894 | { | 846 | end_pfn = node_end_pfn[nodeid]; |
895 | pgd_t *pgd_base = swapper_pg_dir; | 847 | if (start_pfn > max_low_pfn) |
896 | unsigned long start_pfn, end_pfn; | 848 | continue; |
897 | unsigned long big_page_start; | 849 | if (end_pfn > max_low_pfn) |
898 | #ifdef CONFIG_DEBUG_PAGEALLOC | 850 | end_pfn = max_low_pfn; |
899 | /* | ||
900 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | ||
901 | * This will simplify cpa(), which otherwise needs to support splitting | ||
902 | * large pages into small in interrupt context, etc. | ||
903 | */ | ||
904 | int use_pse = 0; | ||
905 | #else | 851 | #else |
906 | int use_pse = cpu_has_pse; | 852 | start_pfn = 0; |
853 | end_pfn = max_low_pfn; | ||
907 | #endif | 854 | #endif |
908 | 855 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, | |
909 | /* | 856 | bootmap); |
910 | * Find space for the kernel direct mapping tables. | ||
911 | */ | ||
912 | if (!after_init_bootmem) | ||
913 | find_early_table_space(end, use_pse); | ||
914 | |||
915 | #ifdef CONFIG_X86_PAE | ||
916 | set_nx(); | ||
917 | if (nx_enabled) | ||
918 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | ||
919 | #endif | ||
920 | |||
921 | /* Enable PSE if available */ | ||
922 | if (cpu_has_pse) | ||
923 | set_in_cr4(X86_CR4_PSE); | ||
924 | |||
925 | /* Enable PGE if available */ | ||
926 | if (cpu_has_pge) { | ||
927 | set_in_cr4(X86_CR4_PGE); | ||
928 | __supported_pte_mask |= _PAGE_GLOBAL; | ||
929 | } | ||
930 | |||
931 | /* | ||
932 | * Don't use a large page for the first 2/4MB of memory | ||
933 | * because there are often fixed size MTRRs in there | ||
934 | * and overlapping MTRRs into large pages can cause | ||
935 | * slowdowns. | ||
936 | */ | ||
937 | big_page_start = PMD_SIZE; | ||
938 | |||
939 | if (start < big_page_start) { | ||
940 | start_pfn = start >> PAGE_SHIFT; | ||
941 | end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT); | ||
942 | } else { | ||
943 | /* head is not big page alignment ? */ | ||
944 | start_pfn = start >> PAGE_SHIFT; | ||
945 | end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
946 | << (PMD_SHIFT - PAGE_SHIFT); | ||
947 | } | ||
948 | if (start_pfn < end_pfn) | ||
949 | kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0); | ||
950 | |||
951 | /* big page range */ | ||
952 | start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
953 | << (PMD_SHIFT - PAGE_SHIFT); | ||
954 | if (start_pfn < (big_page_start >> PAGE_SHIFT)) | ||
955 | start_pfn = big_page_start >> PAGE_SHIFT; | ||
956 | end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | ||
957 | if (start_pfn < end_pfn) | ||
958 | kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, | ||
959 | use_pse); | ||
960 | |||
961 | /* tail is not big page alignment ? */ | ||
962 | start_pfn = end_pfn; | ||
963 | if (start_pfn > (big_page_start>>PAGE_SHIFT)) { | ||
964 | end_pfn = end >> PAGE_SHIFT; | ||
965 | if (start_pfn < end_pfn) | ||
966 | kernel_physical_mapping_init(pgd_base, start_pfn, | ||
967 | end_pfn, 0); | ||
968 | } | 857 | } |
969 | 858 | ||
970 | early_ioremap_page_table_range_init(pgd_base); | 859 | after_bootmem = 1; |
971 | |||
972 | load_cr3(swapper_pg_dir); | ||
973 | |||
974 | __flush_tlb_all(); | ||
975 | |||
976 | if (!after_init_bootmem) | ||
977 | reserve_early(table_start << PAGE_SHIFT, | ||
978 | table_end << PAGE_SHIFT, "PGTABLE"); | ||
979 | |||
980 | if (!after_init_bootmem) | ||
981 | early_memtest(start, end); | ||
982 | |||
983 | return end >> PAGE_SHIFT; | ||
984 | } | 860 | } |
985 | 861 | ||
986 | |||
987 | /* | 862 | /* |
988 | * paging_init() sets up the page tables - note that the first 8MB are | 863 | * paging_init() sets up the page tables - note that the first 8MB are |
989 | * already mapped by head.S. | 864 | * already mapped by head.S. |
@@ -1179,17 +1054,47 @@ static noinline int do_test_wp_bit(void) | |||
1179 | const int rodata_test_data = 0xC3; | 1054 | const int rodata_test_data = 0xC3; |
1180 | EXPORT_SYMBOL_GPL(rodata_test_data); | 1055 | EXPORT_SYMBOL_GPL(rodata_test_data); |
1181 | 1056 | ||
1057 | static int kernel_set_to_readonly; | ||
1058 | |||
1059 | void set_kernel_text_rw(void) | ||
1060 | { | ||
1061 | unsigned long start = PFN_ALIGN(_text); | ||
1062 | unsigned long size = PFN_ALIGN(_etext) - start; | ||
1063 | |||
1064 | if (!kernel_set_to_readonly) | ||
1065 | return; | ||
1066 | |||
1067 | pr_debug("Set kernel text: %lx - %lx for read write\n", | ||
1068 | start, start+size); | ||
1069 | |||
1070 | set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); | ||
1071 | } | ||
1072 | |||
1073 | void set_kernel_text_ro(void) | ||
1074 | { | ||
1075 | unsigned long start = PFN_ALIGN(_text); | ||
1076 | unsigned long size = PFN_ALIGN(_etext) - start; | ||
1077 | |||
1078 | if (!kernel_set_to_readonly) | ||
1079 | return; | ||
1080 | |||
1081 | pr_debug("Set kernel text: %lx - %lx for read only\n", | ||
1082 | start, start+size); | ||
1083 | |||
1084 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | ||
1085 | } | ||
1086 | |||
1182 | void mark_rodata_ro(void) | 1087 | void mark_rodata_ro(void) |
1183 | { | 1088 | { |
1184 | unsigned long start = PFN_ALIGN(_text); | 1089 | unsigned long start = PFN_ALIGN(_text); |
1185 | unsigned long size = PFN_ALIGN(_etext) - start; | 1090 | unsigned long size = PFN_ALIGN(_etext) - start; |
1186 | 1091 | ||
1187 | #ifndef CONFIG_DYNAMIC_FTRACE | ||
1188 | /* Dynamic tracing modifies the kernel text section */ | ||
1189 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 1092 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
1190 | printk(KERN_INFO "Write protecting the kernel text: %luk\n", | 1093 | printk(KERN_INFO "Write protecting the kernel text: %luk\n", |
1191 | size >> 10); | 1094 | size >> 10); |
1192 | 1095 | ||
1096 | kernel_set_to_readonly = 1; | ||
1097 | |||
1193 | #ifdef CONFIG_CPA_DEBUG | 1098 | #ifdef CONFIG_CPA_DEBUG |
1194 | printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n", | 1099 | printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n", |
1195 | start, start+size); | 1100 | start, start+size); |
@@ -1198,7 +1103,6 @@ void mark_rodata_ro(void) | |||
1198 | printk(KERN_INFO "Testing CPA: write protecting again\n"); | 1103 | printk(KERN_INFO "Testing CPA: write protecting again\n"); |
1199 | set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); | 1104 | set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); |
1200 | #endif | 1105 | #endif |
1201 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
1202 | 1106 | ||
1203 | start += size; | 1107 | start += size; |
1204 | size = (unsigned long)__end_rodata - start; | 1108 | size = (unsigned long)__end_rodata - start; |
@@ -1217,13 +1121,6 @@ void mark_rodata_ro(void) | |||
1217 | } | 1121 | } |
1218 | #endif | 1122 | #endif |
1219 | 1123 | ||
1220 | #ifdef CONFIG_BLK_DEV_INITRD | ||
1221 | void free_initrd_mem(unsigned long start, unsigned long end) | ||
1222 | { | ||
1223 | free_init_pages("initrd memory", start, end); | ||
1224 | } | ||
1225 | #endif | ||
1226 | |||
1227 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, | 1124 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, |
1228 | int flags) | 1125 | int flags) |
1229 | { | 1126 | { |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 07f44d491df1..1753e8020df6 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <asm/kdebug.h> | 48 | #include <asm/kdebug.h> |
49 | #include <asm/numa.h> | 49 | #include <asm/numa.h> |
50 | #include <asm/cacheflush.h> | 50 | #include <asm/cacheflush.h> |
51 | #include <asm/init.h> | ||
51 | 52 | ||
52 | /* | 53 | /* |
53 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 54 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. |
@@ -61,12 +62,6 @@ static unsigned long dma_reserve __initdata; | |||
61 | 62 | ||
62 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 63 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
63 | 64 | ||
64 | int direct_gbpages | ||
65 | #ifdef CONFIG_DIRECT_GBPAGES | ||
66 | = 1 | ||
67 | #endif | ||
68 | ; | ||
69 | |||
70 | static int __init parse_direct_gbpages_off(char *arg) | 65 | static int __init parse_direct_gbpages_off(char *arg) |
71 | { | 66 | { |
72 | direct_gbpages = 0; | 67 | direct_gbpages = 0; |
@@ -87,12 +82,10 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
87 | * around without checking the pgd every time. | 82 | * around without checking the pgd every time. |
88 | */ | 83 | */ |
89 | 84 | ||
90 | int after_bootmem; | ||
91 | |||
92 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; | 85 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; |
93 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 86 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
94 | 87 | ||
95 | static int do_not_nx __cpuinitdata; | 88 | static int disable_nx __cpuinitdata; |
96 | 89 | ||
97 | /* | 90 | /* |
98 | * noexec=on|off | 91 | * noexec=on|off |
@@ -107,9 +100,9 @@ static int __init nonx_setup(char *str) | |||
107 | return -EINVAL; | 100 | return -EINVAL; |
108 | if (!strncmp(str, "on", 2)) { | 101 | if (!strncmp(str, "on", 2)) { |
109 | __supported_pte_mask |= _PAGE_NX; | 102 | __supported_pte_mask |= _PAGE_NX; |
110 | do_not_nx = 0; | 103 | disable_nx = 0; |
111 | } else if (!strncmp(str, "off", 3)) { | 104 | } else if (!strncmp(str, "off", 3)) { |
112 | do_not_nx = 1; | 105 | disable_nx = 1; |
113 | __supported_pte_mask &= ~_PAGE_NX; | 106 | __supported_pte_mask &= ~_PAGE_NX; |
114 | } | 107 | } |
115 | return 0; | 108 | return 0; |
@@ -121,7 +114,7 @@ void __cpuinit check_efer(void) | |||
121 | unsigned long efer; | 114 | unsigned long efer; |
122 | 115 | ||
123 | rdmsrl(MSR_EFER, efer); | 116 | rdmsrl(MSR_EFER, efer); |
124 | if (!(efer & EFER_NX) || do_not_nx) | 117 | if (!(efer & EFER_NX) || disable_nx) |
125 | __supported_pte_mask &= ~_PAGE_NX; | 118 | __supported_pte_mask &= ~_PAGE_NX; |
126 | } | 119 | } |
127 | 120 | ||
@@ -325,13 +318,9 @@ void __init cleanup_highmap(void) | |||
325 | } | 318 | } |
326 | } | 319 | } |
327 | 320 | ||
328 | static unsigned long __initdata table_start; | ||
329 | static unsigned long __meminitdata table_end; | ||
330 | static unsigned long __meminitdata table_top; | ||
331 | |||
332 | static __ref void *alloc_low_page(unsigned long *phys) | 321 | static __ref void *alloc_low_page(unsigned long *phys) |
333 | { | 322 | { |
334 | unsigned long pfn = table_end++; | 323 | unsigned long pfn = e820_table_end++; |
335 | void *adr; | 324 | void *adr; |
336 | 325 | ||
337 | if (after_bootmem) { | 326 | if (after_bootmem) { |
@@ -341,7 +330,7 @@ static __ref void *alloc_low_page(unsigned long *phys) | |||
341 | return adr; | 330 | return adr; |
342 | } | 331 | } |
343 | 332 | ||
344 | if (pfn >= table_top) | 333 | if (pfn >= e820_table_top) |
345 | panic("alloc_low_page: ran out of memory"); | 334 | panic("alloc_low_page: ran out of memory"); |
346 | 335 | ||
347 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); | 336 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); |
@@ -581,58 +570,10 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, | |||
581 | return phys_pud_init(pud, addr, end, page_size_mask); | 570 | return phys_pud_init(pud, addr, end, page_size_mask); |
582 | } | 571 | } |
583 | 572 | ||
584 | static void __init find_early_table_space(unsigned long end, int use_pse, | 573 | unsigned long __init |
585 | int use_gbpages) | 574 | kernel_physical_mapping_init(unsigned long start, |
586 | { | 575 | unsigned long end, |
587 | unsigned long puds, pmds, ptes, tables, start; | 576 | unsigned long page_size_mask) |
588 | |||
589 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | ||
590 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); | ||
591 | if (use_gbpages) { | ||
592 | unsigned long extra; | ||
593 | extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); | ||
594 | pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; | ||
595 | } else | ||
596 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | ||
597 | tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); | ||
598 | |||
599 | if (use_pse) { | ||
600 | unsigned long extra; | ||
601 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); | ||
602 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
603 | } else | ||
604 | ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
605 | tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); | ||
606 | |||
607 | /* | ||
608 | * RED-PEN putting page tables only on node 0 could | ||
609 | * cause a hotspot and fill up ZONE_DMA. The page tables | ||
610 | * need roughly 0.5KB per GB. | ||
611 | */ | ||
612 | start = 0x8000; | ||
613 | table_start = find_e820_area(start, end, tables, PAGE_SIZE); | ||
614 | if (table_start == -1UL) | ||
615 | panic("Cannot find space for the kernel page tables"); | ||
616 | |||
617 | table_start >>= PAGE_SHIFT; | ||
618 | table_end = table_start; | ||
619 | table_top = table_start + (tables >> PAGE_SHIFT); | ||
620 | |||
621 | printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", | ||
622 | end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT); | ||
623 | } | ||
624 | |||
625 | static void __init init_gbpages(void) | ||
626 | { | ||
627 | if (direct_gbpages && cpu_has_gbpages) | ||
628 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
629 | else | ||
630 | direct_gbpages = 0; | ||
631 | } | ||
632 | |||
633 | static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, | ||
634 | unsigned long end, | ||
635 | unsigned long page_size_mask) | ||
636 | { | 577 | { |
637 | 578 | ||
638 | unsigned long next, last_map_addr = end; | 579 | unsigned long next, last_map_addr = end; |
@@ -669,176 +610,6 @@ static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, | |||
669 | return last_map_addr; | 610 | return last_map_addr; |
670 | } | 611 | } |
671 | 612 | ||
672 | struct map_range { | ||
673 | unsigned long start; | ||
674 | unsigned long end; | ||
675 | unsigned page_size_mask; | ||
676 | }; | ||
677 | |||
678 | #define NR_RANGE_MR 5 | ||
679 | |||
680 | static int save_mr(struct map_range *mr, int nr_range, | ||
681 | unsigned long start_pfn, unsigned long end_pfn, | ||
682 | unsigned long page_size_mask) | ||
683 | { | ||
684 | |||
685 | if (start_pfn < end_pfn) { | ||
686 | if (nr_range >= NR_RANGE_MR) | ||
687 | panic("run out of range for init_memory_mapping\n"); | ||
688 | mr[nr_range].start = start_pfn<<PAGE_SHIFT; | ||
689 | mr[nr_range].end = end_pfn<<PAGE_SHIFT; | ||
690 | mr[nr_range].page_size_mask = page_size_mask; | ||
691 | nr_range++; | ||
692 | } | ||
693 | |||
694 | return nr_range; | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | ||
699 | * This runs before bootmem is initialized and gets pages directly from | ||
700 | * the physical memory. To access them they are temporarily mapped. | ||
701 | */ | ||
702 | unsigned long __init_refok init_memory_mapping(unsigned long start, | ||
703 | unsigned long end) | ||
704 | { | ||
705 | unsigned long last_map_addr = 0; | ||
706 | unsigned long page_size_mask = 0; | ||
707 | unsigned long start_pfn, end_pfn; | ||
708 | unsigned long pos; | ||
709 | |||
710 | struct map_range mr[NR_RANGE_MR]; | ||
711 | int nr_range, i; | ||
712 | int use_pse, use_gbpages; | ||
713 | |||
714 | printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); | ||
715 | |||
716 | /* | ||
717 | * Find space for the kernel direct mapping tables. | ||
718 | * | ||
719 | * Later we should allocate these tables in the local node of the | ||
720 | * memory mapped. Unfortunately this is done currently before the | ||
721 | * nodes are discovered. | ||
722 | */ | ||
723 | if (!after_bootmem) | ||
724 | init_gbpages(); | ||
725 | |||
726 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
727 | /* | ||
728 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | ||
729 | * This will simplify cpa(), which otherwise needs to support splitting | ||
730 | * large pages into small in interrupt context, etc. | ||
731 | */ | ||
732 | use_pse = use_gbpages = 0; | ||
733 | #else | ||
734 | use_pse = cpu_has_pse; | ||
735 | use_gbpages = direct_gbpages; | ||
736 | #endif | ||
737 | |||
738 | if (use_gbpages) | ||
739 | page_size_mask |= 1 << PG_LEVEL_1G; | ||
740 | if (use_pse) | ||
741 | page_size_mask |= 1 << PG_LEVEL_2M; | ||
742 | |||
743 | memset(mr, 0, sizeof(mr)); | ||
744 | nr_range = 0; | ||
745 | |||
746 | /* head if not big page alignment ?*/ | ||
747 | start_pfn = start >> PAGE_SHIFT; | ||
748 | pos = start_pfn << PAGE_SHIFT; | ||
749 | end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) | ||
750 | << (PMD_SHIFT - PAGE_SHIFT); | ||
751 | if (end_pfn > (end >> PAGE_SHIFT)) | ||
752 | end_pfn = end >> PAGE_SHIFT; | ||
753 | if (start_pfn < end_pfn) { | ||
754 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | ||
755 | pos = end_pfn << PAGE_SHIFT; | ||
756 | } | ||
757 | |||
758 | /* big page (2M) range*/ | ||
759 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
760 | << (PMD_SHIFT - PAGE_SHIFT); | ||
761 | end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | ||
762 | << (PUD_SHIFT - PAGE_SHIFT); | ||
763 | if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) | ||
764 | end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); | ||
765 | if (start_pfn < end_pfn) { | ||
766 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
767 | page_size_mask & (1<<PG_LEVEL_2M)); | ||
768 | pos = end_pfn << PAGE_SHIFT; | ||
769 | } | ||
770 | |||
771 | /* big page (1G) range */ | ||
772 | start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | ||
773 | << (PUD_SHIFT - PAGE_SHIFT); | ||
774 | end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); | ||
775 | if (start_pfn < end_pfn) { | ||
776 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
777 | page_size_mask & | ||
778 | ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); | ||
779 | pos = end_pfn << PAGE_SHIFT; | ||
780 | } | ||
781 | |||
782 | /* tail is not big page (1G) alignment */ | ||
783 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
784 | << (PMD_SHIFT - PAGE_SHIFT); | ||
785 | end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | ||
786 | if (start_pfn < end_pfn) { | ||
787 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
788 | page_size_mask & (1<<PG_LEVEL_2M)); | ||
789 | pos = end_pfn << PAGE_SHIFT; | ||
790 | } | ||
791 | |||
792 | /* tail is not big page (2M) alignment */ | ||
793 | start_pfn = pos>>PAGE_SHIFT; | ||
794 | end_pfn = end>>PAGE_SHIFT; | ||
795 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | ||
796 | |||
797 | /* try to merge same page size and continuous */ | ||
798 | for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { | ||
799 | unsigned long old_start; | ||
800 | if (mr[i].end != mr[i+1].start || | ||
801 | mr[i].page_size_mask != mr[i+1].page_size_mask) | ||
802 | continue; | ||
803 | /* move it */ | ||
804 | old_start = mr[i].start; | ||
805 | memmove(&mr[i], &mr[i+1], | ||
806 | (nr_range - 1 - i) * sizeof (struct map_range)); | ||
807 | mr[i--].start = old_start; | ||
808 | nr_range--; | ||
809 | } | ||
810 | |||
811 | for (i = 0; i < nr_range; i++) | ||
812 | printk(KERN_DEBUG " %010lx - %010lx page %s\n", | ||
813 | mr[i].start, mr[i].end, | ||
814 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( | ||
815 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | ||
816 | |||
817 | if (!after_bootmem) | ||
818 | find_early_table_space(end, use_pse, use_gbpages); | ||
819 | |||
820 | for (i = 0; i < nr_range; i++) | ||
821 | last_map_addr = kernel_physical_mapping_init( | ||
822 | mr[i].start, mr[i].end, | ||
823 | mr[i].page_size_mask); | ||
824 | |||
825 | if (!after_bootmem) | ||
826 | mmu_cr4_features = read_cr4(); | ||
827 | __flush_tlb_all(); | ||
828 | |||
829 | if (!after_bootmem && table_end > table_start) | ||
830 | reserve_early(table_start << PAGE_SHIFT, | ||
831 | table_end << PAGE_SHIFT, "PGTABLE"); | ||
832 | |||
833 | printk(KERN_INFO "last_map_addr: %lx end: %lx\n", | ||
834 | last_map_addr, end); | ||
835 | |||
836 | if (!after_bootmem) | ||
837 | early_memtest(start, end); | ||
838 | |||
839 | return last_map_addr >> PAGE_SHIFT; | ||
840 | } | ||
841 | |||
842 | #ifndef CONFIG_NUMA | 613 | #ifndef CONFIG_NUMA |
843 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | 614 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) |
844 | { | 615 | { |
@@ -910,28 +681,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | |||
910 | 681 | ||
911 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 682 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
912 | 683 | ||
913 | /* | ||
914 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | ||
915 | * is valid. The argument is a physical page number. | ||
916 | * | ||
917 | * | ||
918 | * On x86, access has to be given to the first megabyte of ram because that area | ||
919 | * contains bios code and data regions used by X and dosemu and similar apps. | ||
920 | * Access has to be given to non-kernel-ram areas as well, these contain the PCI | ||
921 | * mmio resources as well as potential bios/acpi data regions. | ||
922 | */ | ||
923 | int devmem_is_allowed(unsigned long pagenr) | ||
924 | { | ||
925 | if (pagenr <= 256) | ||
926 | return 1; | ||
927 | if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) | ||
928 | return 0; | ||
929 | if (!page_is_ram(pagenr)) | ||
930 | return 1; | ||
931 | return 0; | ||
932 | } | ||
933 | |||
934 | |||
935 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, | 684 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, |
936 | kcore_modules, kcore_vsyscall; | 685 | kcore_modules, kcore_vsyscall; |
937 | 686 | ||
@@ -985,21 +734,48 @@ void __init mem_init(void) | |||
985 | const int rodata_test_data = 0xC3; | 734 | const int rodata_test_data = 0xC3; |
986 | EXPORT_SYMBOL_GPL(rodata_test_data); | 735 | EXPORT_SYMBOL_GPL(rodata_test_data); |
987 | 736 | ||
737 | static int kernel_set_to_readonly; | ||
738 | |||
739 | void set_kernel_text_rw(void) | ||
740 | { | ||
741 | unsigned long start = PFN_ALIGN(_stext); | ||
742 | unsigned long end = PFN_ALIGN(__start_rodata); | ||
743 | |||
744 | if (!kernel_set_to_readonly) | ||
745 | return; | ||
746 | |||
747 | pr_debug("Set kernel text: %lx - %lx for read write\n", | ||
748 | start, end); | ||
749 | |||
750 | set_memory_rw(start, (end - start) >> PAGE_SHIFT); | ||
751 | } | ||
752 | |||
753 | void set_kernel_text_ro(void) | ||
754 | { | ||
755 | unsigned long start = PFN_ALIGN(_stext); | ||
756 | unsigned long end = PFN_ALIGN(__start_rodata); | ||
757 | |||
758 | if (!kernel_set_to_readonly) | ||
759 | return; | ||
760 | |||
761 | pr_debug("Set kernel text: %lx - %lx for read only\n", | ||
762 | start, end); | ||
763 | |||
764 | set_memory_ro(start, (end - start) >> PAGE_SHIFT); | ||
765 | } | ||
766 | |||
988 | void mark_rodata_ro(void) | 767 | void mark_rodata_ro(void) |
989 | { | 768 | { |
990 | unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); | 769 | unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); |
991 | unsigned long rodata_start = | 770 | unsigned long rodata_start = |
992 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | 771 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; |
993 | 772 | ||
994 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
995 | /* Dynamic tracing modifies the kernel text section */ | ||
996 | start = rodata_start; | ||
997 | #endif | ||
998 | |||
999 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | 773 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
1000 | (end - start) >> 10); | 774 | (end - start) >> 10); |
1001 | set_memory_ro(start, (end - start) >> PAGE_SHIFT); | 775 | set_memory_ro(start, (end - start) >> PAGE_SHIFT); |
1002 | 776 | ||
777 | kernel_set_to_readonly = 1; | ||
778 | |||
1003 | /* | 779 | /* |
1004 | * The rodata section (but not the kernel text!) should also be | 780 | * The rodata section (but not the kernel text!) should also be |
1005 | * not-executable. | 781 | * not-executable. |
@@ -1019,13 +795,6 @@ void mark_rodata_ro(void) | |||
1019 | 795 | ||
1020 | #endif | 796 | #endif |
1021 | 797 | ||
1022 | #ifdef CONFIG_BLK_DEV_INITRD | ||
1023 | void free_initrd_mem(unsigned long start, unsigned long end) | ||
1024 | { | ||
1025 | free_init_pages("initrd memory", start, end); | ||
1026 | } | ||
1027 | #endif | ||
1028 | |||
1029 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, | 798 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, |
1030 | int flags) | 799 | int flags) |
1031 | { | 800 | { |
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index b6a61f3d7ef8..fe6f84ca121e 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c | |||
@@ -19,10 +19,11 @@ | |||
19 | #include <asm/iomap.h> | 19 | #include <asm/iomap.h> |
20 | #include <asm/pat.h> | 20 | #include <asm/pat.h> |
21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
22 | #include <linux/highmem.h> | ||
22 | 23 | ||
23 | int is_io_mapping_possible(resource_size_t base, unsigned long size) | 24 | int is_io_mapping_possible(resource_size_t base, unsigned long size) |
24 | { | 25 | { |
25 | #ifndef CONFIG_X86_PAE | 26 | #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) |
26 | /* There is no way to map greater than 1 << 32 address without PAE */ | 27 | /* There is no way to map greater than 1 << 32 address without PAE */ |
27 | if (base + size > 0x100000000ULL) | 28 | if (base + size > 0x100000000ULL) |
28 | return 0; | 29 | return 0; |
@@ -31,16 +32,28 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) | |||
31 | } | 32 | } |
32 | EXPORT_SYMBOL_GPL(is_io_mapping_possible); | 33 | EXPORT_SYMBOL_GPL(is_io_mapping_possible); |
33 | 34 | ||
34 | /* Map 'pfn' using fixed map 'type' and protections 'prot' | 35 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) |
35 | */ | ||
36 | void * | ||
37 | iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) | ||
38 | { | 36 | { |
39 | enum fixed_addresses idx; | 37 | enum fixed_addresses idx; |
40 | unsigned long vaddr; | 38 | unsigned long vaddr; |
41 | 39 | ||
42 | pagefault_disable(); | 40 | pagefault_disable(); |
43 | 41 | ||
42 | debug_kmap_atomic(type); | ||
43 | idx = type + KM_TYPE_NR * smp_processor_id(); | ||
44 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | ||
45 | set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); | ||
46 | arch_flush_lazy_mmu_mode(); | ||
47 | |||
48 | return (void *)vaddr; | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * Map 'pfn' using fixed map 'type' and protections 'prot' | ||
53 | */ | ||
54 | void * | ||
55 | iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) | ||
56 | { | ||
44 | /* | 57 | /* |
45 | * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. | 58 | * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. |
46 | * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the | 59 | * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the |
@@ -50,12 +63,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) | |||
50 | if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) | 63 | if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) |
51 | prot = PAGE_KERNEL_UC_MINUS; | 64 | prot = PAGE_KERNEL_UC_MINUS; |
52 | 65 | ||
53 | idx = type + KM_TYPE_NR*smp_processor_id(); | 66 | return kmap_atomic_prot_pfn(pfn, type, prot); |
54 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | ||
55 | set_pte(kmap_pte-idx, pfn_pte(pfn, prot)); | ||
56 | arch_flush_lazy_mmu_mode(); | ||
57 | |||
58 | return (void*) vaddr; | ||
59 | } | 67 | } |
60 | EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); | 68 | EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); |
61 | 69 | ||
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 433f7bd4648a..0dfa09d69e80 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -22,13 +22,17 @@ | |||
22 | #include <asm/pgalloc.h> | 22 | #include <asm/pgalloc.h> |
23 | #include <asm/pat.h> | 23 | #include <asm/pat.h> |
24 | 24 | ||
25 | #ifdef CONFIG_X86_64 | 25 | static inline int phys_addr_valid(resource_size_t addr) |
26 | |||
27 | static inline int phys_addr_valid(unsigned long addr) | ||
28 | { | 26 | { |
29 | return addr < (1UL << boot_cpu_data.x86_phys_bits); | 27 | #ifdef CONFIG_PHYS_ADDR_T_64BIT |
28 | return !(addr >> boot_cpu_data.x86_phys_bits); | ||
29 | #else | ||
30 | return 1; | ||
31 | #endif | ||
30 | } | 32 | } |
31 | 33 | ||
34 | #ifdef CONFIG_X86_64 | ||
35 | |||
32 | unsigned long __phys_addr(unsigned long x) | 36 | unsigned long __phys_addr(unsigned long x) |
33 | { | 37 | { |
34 | if (x >= __START_KERNEL_map) { | 38 | if (x >= __START_KERNEL_map) { |
@@ -38,8 +42,7 @@ unsigned long __phys_addr(unsigned long x) | |||
38 | } else { | 42 | } else { |
39 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); | 43 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); |
40 | x -= PAGE_OFFSET; | 44 | x -= PAGE_OFFSET; |
41 | VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM : | 45 | VIRTUAL_BUG_ON(!phys_addr_valid(x)); |
42 | !phys_addr_valid(x)); | ||
43 | } | 46 | } |
44 | return x; | 47 | return x; |
45 | } | 48 | } |
@@ -56,10 +59,8 @@ bool __virt_addr_valid(unsigned long x) | |||
56 | if (x < PAGE_OFFSET) | 59 | if (x < PAGE_OFFSET) |
57 | return false; | 60 | return false; |
58 | x -= PAGE_OFFSET; | 61 | x -= PAGE_OFFSET; |
59 | if (system_state == SYSTEM_BOOTING ? | 62 | if (!phys_addr_valid(x)) |
60 | x > MAXMEM : !phys_addr_valid(x)) { | ||
61 | return false; | 63 | return false; |
62 | } | ||
63 | } | 64 | } |
64 | 65 | ||
65 | return pfn_valid(x >> PAGE_SHIFT); | 66 | return pfn_valid(x >> PAGE_SHIFT); |
@@ -68,18 +69,12 @@ EXPORT_SYMBOL(__virt_addr_valid); | |||
68 | 69 | ||
69 | #else | 70 | #else |
70 | 71 | ||
71 | static inline int phys_addr_valid(unsigned long addr) | ||
72 | { | ||
73 | return 1; | ||
74 | } | ||
75 | |||
76 | #ifdef CONFIG_DEBUG_VIRTUAL | 72 | #ifdef CONFIG_DEBUG_VIRTUAL |
77 | unsigned long __phys_addr(unsigned long x) | 73 | unsigned long __phys_addr(unsigned long x) |
78 | { | 74 | { |
79 | /* VMALLOC_* aren't constants; not available at the boot time */ | 75 | /* VMALLOC_* aren't constants */ |
80 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); | 76 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); |
81 | VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING && | 77 | VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); |
82 | is_vmalloc_addr((void *) x)); | ||
83 | return x - PAGE_OFFSET; | 78 | return x - PAGE_OFFSET; |
84 | } | 79 | } |
85 | EXPORT_SYMBOL(__phys_addr); | 80 | EXPORT_SYMBOL(__phys_addr); |
@@ -89,7 +84,9 @@ bool __virt_addr_valid(unsigned long x) | |||
89 | { | 84 | { |
90 | if (x < PAGE_OFFSET) | 85 | if (x < PAGE_OFFSET) |
91 | return false; | 86 | return false; |
92 | if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x)) | 87 | if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) |
88 | return false; | ||
89 | if (x >= FIXADDR_START) | ||
93 | return false; | 90 | return false; |
94 | return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); | 91 | return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); |
95 | } | 92 | } |
@@ -508,13 +505,19 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) | |||
508 | return &bm_pte[pte_index(addr)]; | 505 | return &bm_pte[pte_index(addr)]; |
509 | } | 506 | } |
510 | 507 | ||
508 | static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; | ||
509 | |||
511 | void __init early_ioremap_init(void) | 510 | void __init early_ioremap_init(void) |
512 | { | 511 | { |
513 | pmd_t *pmd; | 512 | pmd_t *pmd; |
513 | int i; | ||
514 | 514 | ||
515 | if (early_ioremap_debug) | 515 | if (early_ioremap_debug) |
516 | printk(KERN_INFO "early_ioremap_init()\n"); | 516 | printk(KERN_INFO "early_ioremap_init()\n"); |
517 | 517 | ||
518 | for (i = 0; i < FIX_BTMAPS_SLOTS; i++) | ||
519 | slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); | ||
520 | |||
518 | pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); | 521 | pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); |
519 | memset(bm_pte, 0, sizeof(bm_pte)); | 522 | memset(bm_pte, 0, sizeof(bm_pte)); |
520 | pmd_populate_kernel(&init_mm, pmd, bm_pte); | 523 | pmd_populate_kernel(&init_mm, pmd, bm_pte); |
@@ -581,6 +584,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) | |||
581 | 584 | ||
582 | static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; | 585 | static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; |
583 | static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; | 586 | static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; |
587 | |||
584 | static int __init check_early_ioremap_leak(void) | 588 | static int __init check_early_ioremap_leak(void) |
585 | { | 589 | { |
586 | int count = 0; | 590 | int count = 0; |
@@ -602,7 +606,8 @@ static int __init check_early_ioremap_leak(void) | |||
602 | } | 606 | } |
603 | late_initcall(check_early_ioremap_leak); | 607 | late_initcall(check_early_ioremap_leak); |
604 | 608 | ||
605 | static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) | 609 | static void __init __iomem * |
610 | __early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) | ||
606 | { | 611 | { |
607 | unsigned long offset, last_addr; | 612 | unsigned long offset, last_addr; |
608 | unsigned int nrpages; | 613 | unsigned int nrpages; |
@@ -668,9 +673,9 @@ static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned lo | |||
668 | --nrpages; | 673 | --nrpages; |
669 | } | 674 | } |
670 | if (early_ioremap_debug) | 675 | if (early_ioremap_debug) |
671 | printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); | 676 | printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); |
672 | 677 | ||
673 | prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); | 678 | prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); |
674 | return prev_map[slot]; | 679 | return prev_map[slot]; |
675 | } | 680 | } |
676 | 681 | ||
@@ -738,8 +743,3 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) | |||
738 | } | 743 | } |
739 | prev_map[slot] = NULL; | 744 | prev_map[slot] = NULL; |
740 | } | 745 | } |
741 | |||
742 | void __this_fixmap_does_not_exist(void) | ||
743 | { | ||
744 | WARN_ON(1); | ||
745 | } | ||
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 6a518dd08a36..4f115e00486b 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
310 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); | 310 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); |
311 | 311 | ||
312 | if (!ctx->active) { | 312 | if (!ctx->active) { |
313 | pr_warning("kmmio: spurious debug trap on CPU %d.\n", | 313 | pr_debug("kmmio: spurious debug trap on CPU %d.\n", |
314 | smp_processor_id()); | 314 | smp_processor_id()); |
315 | goto out; | 315 | goto out; |
316 | } | 316 | } |
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 0bcd7883d036..605c8be06217 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c | |||
@@ -100,6 +100,9 @@ static int __init parse_memtest(char *arg) | |||
100 | { | 100 | { |
101 | if (arg) | 101 | if (arg) |
102 | memtest_pattern = simple_strtoul(arg, NULL, 0); | 102 | memtest_pattern = simple_strtoul(arg, NULL, 0); |
103 | else | ||
104 | memtest_pattern = ARRAY_SIZE(patterns); | ||
105 | |||
103 | return 0; | 106 | return 0; |
104 | } | 107 | } |
105 | 108 | ||
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 2c4baa88f2cb..c9342ed8b402 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c | |||
@@ -378,27 +378,34 @@ static void clear_trace_list(void) | |||
378 | } | 378 | } |
379 | 379 | ||
380 | #ifdef CONFIG_HOTPLUG_CPU | 380 | #ifdef CONFIG_HOTPLUG_CPU |
381 | static cpumask_t downed_cpus; | 381 | static cpumask_var_t downed_cpus; |
382 | 382 | ||
383 | static void enter_uniprocessor(void) | 383 | static void enter_uniprocessor(void) |
384 | { | 384 | { |
385 | int cpu; | 385 | int cpu; |
386 | int err; | 386 | int err; |
387 | 387 | ||
388 | if (downed_cpus == NULL && | ||
389 | !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { | ||
390 | pr_notice(NAME "Failed to allocate mask\n"); | ||
391 | goto out; | ||
392 | } | ||
393 | |||
388 | get_online_cpus(); | 394 | get_online_cpus(); |
389 | downed_cpus = cpu_online_map; | 395 | cpumask_copy(downed_cpus, cpu_online_mask); |
390 | cpu_clear(first_cpu(cpu_online_map), downed_cpus); | 396 | cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); |
391 | if (num_online_cpus() > 1) | 397 | if (num_online_cpus() > 1) |
392 | pr_notice(NAME "Disabling non-boot CPUs...\n"); | 398 | pr_notice(NAME "Disabling non-boot CPUs...\n"); |
393 | put_online_cpus(); | 399 | put_online_cpus(); |
394 | 400 | ||
395 | for_each_cpu_mask(cpu, downed_cpus) { | 401 | for_each_cpu(cpu, downed_cpus) { |
396 | err = cpu_down(cpu); | 402 | err = cpu_down(cpu); |
397 | if (!err) | 403 | if (!err) |
398 | pr_info(NAME "CPU%d is down.\n", cpu); | 404 | pr_info(NAME "CPU%d is down.\n", cpu); |
399 | else | 405 | else |
400 | pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); | 406 | pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); |
401 | } | 407 | } |
408 | out: | ||
402 | if (num_online_cpus() > 1) | 409 | if (num_online_cpus() > 1) |
403 | pr_warning(NAME "multiple CPUs still online, " | 410 | pr_warning(NAME "multiple CPUs still online, " |
404 | "may miss events.\n"); | 411 | "may miss events.\n"); |
@@ -411,10 +418,10 @@ static void __ref leave_uniprocessor(void) | |||
411 | int cpu; | 418 | int cpu; |
412 | int err; | 419 | int err; |
413 | 420 | ||
414 | if (cpus_weight(downed_cpus) == 0) | 421 | if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) |
415 | return; | 422 | return; |
416 | pr_notice(NAME "Re-enabling CPUs...\n"); | 423 | pr_notice(NAME "Re-enabling CPUs...\n"); |
417 | for_each_cpu_mask(cpu, downed_cpus) { | 424 | for_each_cpu(cpu, downed_cpus) { |
418 | err = cpu_up(cpu); | 425 | err = cpu_up(cpu); |
419 | if (!err) | 426 | if (!err) |
420 | pr_info(NAME "enabled CPU%d.\n", cpu); | 427 | pr_info(NAME "enabled CPU%d.\n", cpu); |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c new file mode 100644 index 000000000000..550df481accd --- /dev/null +++ b/arch/x86/mm/numa.c | |||
@@ -0,0 +1,67 @@ | |||
1 | /* Common code for 32 and 64-bit NUMA */ | ||
2 | #include <linux/topology.h> | ||
3 | #include <linux/module.h> | ||
4 | #include <linux/bootmem.h> | ||
5 | |||
6 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | ||
7 | # define DBG(x...) printk(KERN_DEBUG x) | ||
8 | #else | ||
9 | # define DBG(x...) | ||
10 | #endif | ||
11 | |||
12 | /* | ||
13 | * Which logical CPUs are on which nodes | ||
14 | */ | ||
15 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; | ||
16 | EXPORT_SYMBOL(node_to_cpumask_map); | ||
17 | |||
18 | /* | ||
19 | * Allocate node_to_cpumask_map based on number of available nodes | ||
20 | * Requires node_possible_map to be valid. | ||
21 | * | ||
22 | * Note: node_to_cpumask() is not valid until after this is done. | ||
23 | * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) | ||
24 | */ | ||
25 | void __init setup_node_to_cpumask_map(void) | ||
26 | { | ||
27 | unsigned int node, num = 0; | ||
28 | |||
29 | /* setup nr_node_ids if not done yet */ | ||
30 | if (nr_node_ids == MAX_NUMNODES) { | ||
31 | for_each_node_mask(node, node_possible_map) | ||
32 | num = node; | ||
33 | nr_node_ids = num + 1; | ||
34 | } | ||
35 | |||
36 | /* allocate the map */ | ||
37 | for (node = 0; node < nr_node_ids; node++) | ||
38 | alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); | ||
39 | |||
40 | /* cpumask_of_node() will now work */ | ||
41 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); | ||
42 | } | ||
43 | |||
44 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | ||
45 | /* | ||
46 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | ||
47 | */ | ||
48 | const struct cpumask *cpumask_of_node(int node) | ||
49 | { | ||
50 | if (node >= nr_node_ids) { | ||
51 | printk(KERN_WARNING | ||
52 | "cpumask_of_node(%d): node > nr_node_ids(%d)\n", | ||
53 | node, nr_node_ids); | ||
54 | dump_stack(); | ||
55 | return cpu_none_mask; | ||
56 | } | ||
57 | if (node_to_cpumask_map[node] == NULL) { | ||
58 | printk(KERN_WARNING | ||
59 | "cpumask_of_node(%d): no node_to_cpumask_map!\n", | ||
60 | node); | ||
61 | dump_stack(); | ||
62 | return cpu_online_mask; | ||
63 | } | ||
64 | return node_to_cpumask_map[node]; | ||
65 | } | ||
66 | EXPORT_SYMBOL(cpumask_of_node); | ||
67 | #endif | ||
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 451fe95a0352..3daefa04ace5 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -416,10 +416,11 @@ void __init initmem_init(unsigned long start_pfn, | |||
416 | for_each_online_node(nid) | 416 | for_each_online_node(nid) |
417 | propagate_e820_map_node(nid); | 417 | propagate_e820_map_node(nid); |
418 | 418 | ||
419 | for_each_online_node(nid) | 419 | for_each_online_node(nid) { |
420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | 420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); |
421 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; | ||
422 | } | ||
421 | 423 | ||
422 | NODE_DATA(0)->bdata = &bootmem_node_data[0]; | ||
423 | setup_bootmem_allocator(); | 424 | setup_bootmem_allocator(); |
424 | } | 425 | } |
425 | 426 | ||
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 64c9cf043cdd..d73aaa892371 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -20,12 +20,6 @@ | |||
20 | #include <asm/acpi.h> | 20 | #include <asm/acpi.h> |
21 | #include <asm/k8.h> | 21 | #include <asm/k8.h> |
22 | 22 | ||
23 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | ||
24 | # define DBG(x...) printk(KERN_DEBUG x) | ||
25 | #else | ||
26 | # define DBG(x...) | ||
27 | #endif | ||
28 | |||
29 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 23 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
30 | EXPORT_SYMBOL(node_data); | 24 | EXPORT_SYMBOL(node_data); |
31 | 25 | ||
@@ -49,12 +43,6 @@ DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | |||
49 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | 43 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); |
50 | 44 | ||
51 | /* | 45 | /* |
52 | * Which logical CPUs are on which nodes | ||
53 | */ | ||
54 | cpumask_t *node_to_cpumask_map; | ||
55 | EXPORT_SYMBOL(node_to_cpumask_map); | ||
56 | |||
57 | /* | ||
58 | * Given a shift value, try to populate memnodemap[] | 46 | * Given a shift value, try to populate memnodemap[] |
59 | * Returns : | 47 | * Returns : |
60 | * 1 if OK | 48 | * 1 if OK |
@@ -661,36 +649,6 @@ void __init init_cpu_to_node(void) | |||
661 | #endif | 649 | #endif |
662 | 650 | ||
663 | 651 | ||
664 | /* | ||
665 | * Allocate node_to_cpumask_map based on number of available nodes | ||
666 | * Requires node_possible_map to be valid. | ||
667 | * | ||
668 | * Note: node_to_cpumask() is not valid until after this is done. | ||
669 | * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) | ||
670 | */ | ||
671 | void __init setup_node_to_cpumask_map(void) | ||
672 | { | ||
673 | unsigned int node, num = 0; | ||
674 | cpumask_t *map; | ||
675 | |||
676 | /* setup nr_node_ids if not done yet */ | ||
677 | if (nr_node_ids == MAX_NUMNODES) { | ||
678 | for_each_node_mask(node, node_possible_map) | ||
679 | num = node; | ||
680 | nr_node_ids = num + 1; | ||
681 | } | ||
682 | |||
683 | /* allocate the map */ | ||
684 | map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); | ||
685 | DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids); | ||
686 | |||
687 | pr_debug("Node to cpumask map at %p for %d nodes\n", | ||
688 | map, nr_node_ids); | ||
689 | |||
690 | /* node_to_cpumask() will now work */ | ||
691 | node_to_cpumask_map = map; | ||
692 | } | ||
693 | |||
694 | void __cpuinit numa_set_node(int cpu, int node) | 652 | void __cpuinit numa_set_node(int cpu, int node) |
695 | { | 653 | { |
696 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | 654 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); |
@@ -723,12 +681,12 @@ void __cpuinit numa_clear_node(int cpu) | |||
723 | 681 | ||
724 | void __cpuinit numa_add_cpu(int cpu) | 682 | void __cpuinit numa_add_cpu(int cpu) |
725 | { | 683 | { |
726 | cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | 684 | cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); |
727 | } | 685 | } |
728 | 686 | ||
729 | void __cpuinit numa_remove_cpu(int cpu) | 687 | void __cpuinit numa_remove_cpu(int cpu) |
730 | { | 688 | { |
731 | cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | 689 | cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); |
732 | } | 690 | } |
733 | 691 | ||
734 | #else /* CONFIG_DEBUG_PER_CPU_MAPS */ | 692 | #else /* CONFIG_DEBUG_PER_CPU_MAPS */ |
@@ -739,20 +697,20 @@ void __cpuinit numa_remove_cpu(int cpu) | |||
739 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | 697 | static void __cpuinit numa_set_cpumask(int cpu, int enable) |
740 | { | 698 | { |
741 | int node = early_cpu_to_node(cpu); | 699 | int node = early_cpu_to_node(cpu); |
742 | cpumask_t *mask; | 700 | struct cpumask *mask; |
743 | char buf[64]; | 701 | char buf[64]; |
744 | 702 | ||
745 | if (node_to_cpumask_map == NULL) { | 703 | mask = node_to_cpumask_map[node]; |
746 | printk(KERN_ERR "node_to_cpumask_map NULL\n"); | 704 | if (mask == NULL) { |
705 | printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node); | ||
747 | dump_stack(); | 706 | dump_stack(); |
748 | return; | 707 | return; |
749 | } | 708 | } |
750 | 709 | ||
751 | mask = &node_to_cpumask_map[node]; | ||
752 | if (enable) | 710 | if (enable) |
753 | cpu_set(cpu, *mask); | 711 | cpumask_set_cpu(cpu, mask); |
754 | else | 712 | else |
755 | cpu_clear(cpu, *mask); | 713 | cpumask_clear_cpu(cpu, mask); |
756 | 714 | ||
757 | cpulist_scnprintf(buf, sizeof(buf), mask); | 715 | cpulist_scnprintf(buf, sizeof(buf), mask); |
758 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | 716 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", |
@@ -799,59 +757,6 @@ int early_cpu_to_node(int cpu) | |||
799 | return per_cpu(x86_cpu_to_node_map, cpu); | 757 | return per_cpu(x86_cpu_to_node_map, cpu); |
800 | } | 758 | } |
801 | 759 | ||
802 | |||
803 | /* empty cpumask */ | ||
804 | static const cpumask_t cpu_mask_none; | ||
805 | |||
806 | /* | ||
807 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | ||
808 | */ | ||
809 | const cpumask_t *cpumask_of_node(int node) | ||
810 | { | ||
811 | if (node_to_cpumask_map == NULL) { | ||
812 | printk(KERN_WARNING | ||
813 | "cpumask_of_node(%d): no node_to_cpumask_map!\n", | ||
814 | node); | ||
815 | dump_stack(); | ||
816 | return (const cpumask_t *)&cpu_online_map; | ||
817 | } | ||
818 | if (node >= nr_node_ids) { | ||
819 | printk(KERN_WARNING | ||
820 | "cpumask_of_node(%d): node > nr_node_ids(%d)\n", | ||
821 | node, nr_node_ids); | ||
822 | dump_stack(); | ||
823 | return &cpu_mask_none; | ||
824 | } | ||
825 | return &node_to_cpumask_map[node]; | ||
826 | } | ||
827 | EXPORT_SYMBOL(cpumask_of_node); | ||
828 | |||
829 | /* | ||
830 | * Returns a bitmask of CPUs on Node 'node'. | ||
831 | * | ||
832 | * Side note: this function creates the returned cpumask on the stack | ||
833 | * so with a high NR_CPUS count, excessive stack space is used. The | ||
834 | * node_to_cpumask_ptr function should be used whenever possible. | ||
835 | */ | ||
836 | cpumask_t node_to_cpumask(int node) | ||
837 | { | ||
838 | if (node_to_cpumask_map == NULL) { | ||
839 | printk(KERN_WARNING | ||
840 | "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); | ||
841 | dump_stack(); | ||
842 | return cpu_online_map; | ||
843 | } | ||
844 | if (node >= nr_node_ids) { | ||
845 | printk(KERN_WARNING | ||
846 | "node_to_cpumask(%d): node > nr_node_ids(%d)\n", | ||
847 | node, nr_node_ids); | ||
848 | dump_stack(); | ||
849 | return cpu_mask_none; | ||
850 | } | ||
851 | return node_to_cpumask_map[node]; | ||
852 | } | ||
853 | EXPORT_SYMBOL(node_to_cpumask); | ||
854 | |||
855 | /* | 760 | /* |
856 | * --------- end of debug versions of the numa functions --------- | 761 | * --------- end of debug versions of the numa functions --------- |
857 | */ | 762 | */ |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 9015e5e412b5..660cac75ae11 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/processor.h> | 16 | #include <asm/processor.h> |
17 | #include <asm/tlbflush.h> | 17 | #include <asm/tlbflush.h> |
18 | #include <asm/sections.h> | 18 | #include <asm/sections.h> |
19 | #include <asm/setup.h> | ||
19 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
20 | #include <asm/pgalloc.h> | 21 | #include <asm/pgalloc.h> |
21 | #include <asm/proto.h> | 22 | #include <asm/proto.h> |
@@ -33,6 +34,7 @@ struct cpa_data { | |||
33 | unsigned long pfn; | 34 | unsigned long pfn; |
34 | unsigned force_split : 1; | 35 | unsigned force_split : 1; |
35 | int curpage; | 36 | int curpage; |
37 | struct page **pages; | ||
36 | }; | 38 | }; |
37 | 39 | ||
38 | /* | 40 | /* |
@@ -45,6 +47,7 @@ static DEFINE_SPINLOCK(cpa_lock); | |||
45 | 47 | ||
46 | #define CPA_FLUSHTLB 1 | 48 | #define CPA_FLUSHTLB 1 |
47 | #define CPA_ARRAY 2 | 49 | #define CPA_ARRAY 2 |
50 | #define CPA_PAGES_ARRAY 4 | ||
48 | 51 | ||
49 | #ifdef CONFIG_PROC_FS | 52 | #ifdef CONFIG_PROC_FS |
50 | static unsigned long direct_pages_count[PG_LEVEL_NUM]; | 53 | static unsigned long direct_pages_count[PG_LEVEL_NUM]; |
@@ -95,7 +98,7 @@ static inline unsigned long highmap_start_pfn(void) | |||
95 | 98 | ||
96 | static inline unsigned long highmap_end_pfn(void) | 99 | static inline unsigned long highmap_end_pfn(void) |
97 | { | 100 | { |
98 | return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; | 101 | return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; |
99 | } | 102 | } |
100 | 103 | ||
101 | #endif | 104 | #endif |
@@ -201,10 +204,10 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) | |||
201 | } | 204 | } |
202 | } | 205 | } |
203 | 206 | ||
204 | static void cpa_flush_array(unsigned long *start, int numpages, int cache) | 207 | static void cpa_flush_array(unsigned long *start, int numpages, int cache, |
208 | int in_flags, struct page **pages) | ||
205 | { | 209 | { |
206 | unsigned int i, level; | 210 | unsigned int i, level; |
207 | unsigned long *addr; | ||
208 | 211 | ||
209 | BUG_ON(irqs_disabled()); | 212 | BUG_ON(irqs_disabled()); |
210 | 213 | ||
@@ -225,14 +228,22 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache) | |||
225 | * will cause all other CPUs to flush the same | 228 | * will cause all other CPUs to flush the same |
226 | * cachelines: | 229 | * cachelines: |
227 | */ | 230 | */ |
228 | for (i = 0, addr = start; i < numpages; i++, addr++) { | 231 | for (i = 0; i < numpages; i++) { |
229 | pte_t *pte = lookup_address(*addr, &level); | 232 | unsigned long addr; |
233 | pte_t *pte; | ||
234 | |||
235 | if (in_flags & CPA_PAGES_ARRAY) | ||
236 | addr = (unsigned long)page_address(pages[i]); | ||
237 | else | ||
238 | addr = start[i]; | ||
239 | |||
240 | pte = lookup_address(addr, &level); | ||
230 | 241 | ||
231 | /* | 242 | /* |
232 | * Only flush present addresses: | 243 | * Only flush present addresses: |
233 | */ | 244 | */ |
234 | if (pte && (pte_val(*pte) & _PAGE_PRESENT)) | 245 | if (pte && (pte_val(*pte) & _PAGE_PRESENT)) |
235 | clflush_cache_range((void *) *addr, PAGE_SIZE); | 246 | clflush_cache_range((void *)addr, PAGE_SIZE); |
236 | } | 247 | } |
237 | } | 248 | } |
238 | 249 | ||
@@ -584,7 +595,9 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) | |||
584 | unsigned int level; | 595 | unsigned int level; |
585 | pte_t *kpte, old_pte; | 596 | pte_t *kpte, old_pte; |
586 | 597 | ||
587 | if (cpa->flags & CPA_ARRAY) | 598 | if (cpa->flags & CPA_PAGES_ARRAY) |
599 | address = (unsigned long)page_address(cpa->pages[cpa->curpage]); | ||
600 | else if (cpa->flags & CPA_ARRAY) | ||
588 | address = cpa->vaddr[cpa->curpage]; | 601 | address = cpa->vaddr[cpa->curpage]; |
589 | else | 602 | else |
590 | address = *cpa->vaddr; | 603 | address = *cpa->vaddr; |
@@ -687,7 +700,9 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
687 | * No need to redo, when the primary call touched the direct | 700 | * No need to redo, when the primary call touched the direct |
688 | * mapping already: | 701 | * mapping already: |
689 | */ | 702 | */ |
690 | if (cpa->flags & CPA_ARRAY) | 703 | if (cpa->flags & CPA_PAGES_ARRAY) |
704 | vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]); | ||
705 | else if (cpa->flags & CPA_ARRAY) | ||
691 | vaddr = cpa->vaddr[cpa->curpage]; | 706 | vaddr = cpa->vaddr[cpa->curpage]; |
692 | else | 707 | else |
693 | vaddr = *cpa->vaddr; | 708 | vaddr = *cpa->vaddr; |
@@ -698,7 +713,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
698 | alias_cpa = *cpa; | 713 | alias_cpa = *cpa; |
699 | temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); | 714 | temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); |
700 | alias_cpa.vaddr = &temp_cpa_vaddr; | 715 | alias_cpa.vaddr = &temp_cpa_vaddr; |
701 | alias_cpa.flags &= ~CPA_ARRAY; | 716 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); |
702 | 717 | ||
703 | 718 | ||
704 | ret = __change_page_attr_set_clr(&alias_cpa, 0); | 719 | ret = __change_page_attr_set_clr(&alias_cpa, 0); |
@@ -711,7 +726,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
711 | * No need to redo, when the primary call touched the high | 726 | * No need to redo, when the primary call touched the high |
712 | * mapping already: | 727 | * mapping already: |
713 | */ | 728 | */ |
714 | if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) | 729 | if (within(vaddr, (unsigned long) _text, _brk_end)) |
715 | return 0; | 730 | return 0; |
716 | 731 | ||
717 | /* | 732 | /* |
@@ -724,7 +739,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
724 | alias_cpa = *cpa; | 739 | alias_cpa = *cpa; |
725 | temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; | 740 | temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; |
726 | alias_cpa.vaddr = &temp_cpa_vaddr; | 741 | alias_cpa.vaddr = &temp_cpa_vaddr; |
727 | alias_cpa.flags &= ~CPA_ARRAY; | 742 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); |
728 | 743 | ||
729 | /* | 744 | /* |
730 | * The high mapping range is imprecise, so ignore the return value. | 745 | * The high mapping range is imprecise, so ignore the return value. |
@@ -745,7 +760,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
745 | */ | 760 | */ |
746 | cpa->numpages = numpages; | 761 | cpa->numpages = numpages; |
747 | /* for array changes, we can't use large page */ | 762 | /* for array changes, we can't use large page */ |
748 | if (cpa->flags & CPA_ARRAY) | 763 | if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) |
749 | cpa->numpages = 1; | 764 | cpa->numpages = 1; |
750 | 765 | ||
751 | if (!debug_pagealloc) | 766 | if (!debug_pagealloc) |
@@ -769,7 +784,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
769 | */ | 784 | */ |
770 | BUG_ON(cpa->numpages > numpages); | 785 | BUG_ON(cpa->numpages > numpages); |
771 | numpages -= cpa->numpages; | 786 | numpages -= cpa->numpages; |
772 | if (cpa->flags & CPA_ARRAY) | 787 | if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) |
773 | cpa->curpage++; | 788 | cpa->curpage++; |
774 | else | 789 | else |
775 | *cpa->vaddr += cpa->numpages * PAGE_SIZE; | 790 | *cpa->vaddr += cpa->numpages * PAGE_SIZE; |
@@ -786,7 +801,8 @@ static inline int cache_attr(pgprot_t attr) | |||
786 | 801 | ||
787 | static int change_page_attr_set_clr(unsigned long *addr, int numpages, | 802 | static int change_page_attr_set_clr(unsigned long *addr, int numpages, |
788 | pgprot_t mask_set, pgprot_t mask_clr, | 803 | pgprot_t mask_set, pgprot_t mask_clr, |
789 | int force_split, int array) | 804 | int force_split, int in_flag, |
805 | struct page **pages) | ||
790 | { | 806 | { |
791 | struct cpa_data cpa; | 807 | struct cpa_data cpa; |
792 | int ret, cache, checkalias; | 808 | int ret, cache, checkalias; |
@@ -801,15 +817,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
801 | return 0; | 817 | return 0; |
802 | 818 | ||
803 | /* Ensure we are PAGE_SIZE aligned */ | 819 | /* Ensure we are PAGE_SIZE aligned */ |
804 | if (!array) { | 820 | if (in_flag & CPA_ARRAY) { |
805 | if (*addr & ~PAGE_MASK) { | ||
806 | *addr &= PAGE_MASK; | ||
807 | /* | ||
808 | * People should not be passing in unaligned addresses: | ||
809 | */ | ||
810 | WARN_ON_ONCE(1); | ||
811 | } | ||
812 | } else { | ||
813 | int i; | 821 | int i; |
814 | for (i = 0; i < numpages; i++) { | 822 | for (i = 0; i < numpages; i++) { |
815 | if (addr[i] & ~PAGE_MASK) { | 823 | if (addr[i] & ~PAGE_MASK) { |
@@ -817,6 +825,18 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
817 | WARN_ON_ONCE(1); | 825 | WARN_ON_ONCE(1); |
818 | } | 826 | } |
819 | } | 827 | } |
828 | } else if (!(in_flag & CPA_PAGES_ARRAY)) { | ||
829 | /* | ||
830 | * in_flag of CPA_PAGES_ARRAY implies it is aligned. | ||
831 | * No need to cehck in that case | ||
832 | */ | ||
833 | if (*addr & ~PAGE_MASK) { | ||
834 | *addr &= PAGE_MASK; | ||
835 | /* | ||
836 | * People should not be passing in unaligned addresses: | ||
837 | */ | ||
838 | WARN_ON_ONCE(1); | ||
839 | } | ||
820 | } | 840 | } |
821 | 841 | ||
822 | /* Must avoid aliasing mappings in the highmem code */ | 842 | /* Must avoid aliasing mappings in the highmem code */ |
@@ -825,6 +845,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
825 | vm_unmap_aliases(); | 845 | vm_unmap_aliases(); |
826 | 846 | ||
827 | cpa.vaddr = addr; | 847 | cpa.vaddr = addr; |
848 | cpa.pages = pages; | ||
828 | cpa.numpages = numpages; | 849 | cpa.numpages = numpages; |
829 | cpa.mask_set = mask_set; | 850 | cpa.mask_set = mask_set; |
830 | cpa.mask_clr = mask_clr; | 851 | cpa.mask_clr = mask_clr; |
@@ -832,8 +853,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
832 | cpa.curpage = 0; | 853 | cpa.curpage = 0; |
833 | cpa.force_split = force_split; | 854 | cpa.force_split = force_split; |
834 | 855 | ||
835 | if (array) | 856 | if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY)) |
836 | cpa.flags |= CPA_ARRAY; | 857 | cpa.flags |= in_flag; |
837 | 858 | ||
838 | /* No alias checking for _NX bit modifications */ | 859 | /* No alias checking for _NX bit modifications */ |
839 | checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; | 860 | checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; |
@@ -859,9 +880,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
859 | * wbindv): | 880 | * wbindv): |
860 | */ | 881 | */ |
861 | if (!ret && cpu_has_clflush) { | 882 | if (!ret && cpu_has_clflush) { |
862 | if (cpa.flags & CPA_ARRAY) | 883 | if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { |
863 | cpa_flush_array(addr, numpages, cache); | 884 | cpa_flush_array(addr, numpages, cache, |
864 | else | 885 | cpa.flags, pages); |
886 | } else | ||
865 | cpa_flush_range(*addr, numpages, cache); | 887 | cpa_flush_range(*addr, numpages, cache); |
866 | } else | 888 | } else |
867 | cpa_flush_all(cache); | 889 | cpa_flush_all(cache); |
@@ -874,14 +896,28 @@ static inline int change_page_attr_set(unsigned long *addr, int numpages, | |||
874 | pgprot_t mask, int array) | 896 | pgprot_t mask, int array) |
875 | { | 897 | { |
876 | return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, | 898 | return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, |
877 | array); | 899 | (array ? CPA_ARRAY : 0), NULL); |
878 | } | 900 | } |
879 | 901 | ||
880 | static inline int change_page_attr_clear(unsigned long *addr, int numpages, | 902 | static inline int change_page_attr_clear(unsigned long *addr, int numpages, |
881 | pgprot_t mask, int array) | 903 | pgprot_t mask, int array) |
882 | { | 904 | { |
883 | return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, | 905 | return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, |
884 | array); | 906 | (array ? CPA_ARRAY : 0), NULL); |
907 | } | ||
908 | |||
909 | static inline int cpa_set_pages_array(struct page **pages, int numpages, | ||
910 | pgprot_t mask) | ||
911 | { | ||
912 | return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0, | ||
913 | CPA_PAGES_ARRAY, pages); | ||
914 | } | ||
915 | |||
916 | static inline int cpa_clear_pages_array(struct page **pages, int numpages, | ||
917 | pgprot_t mask) | ||
918 | { | ||
919 | return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0, | ||
920 | CPA_PAGES_ARRAY, pages); | ||
885 | } | 921 | } |
886 | 922 | ||
887 | int _set_memory_uc(unsigned long addr, int numpages) | 923 | int _set_memory_uc(unsigned long addr, int numpages) |
@@ -1029,7 +1065,7 @@ int set_memory_np(unsigned long addr, int numpages) | |||
1029 | int set_memory_4k(unsigned long addr, int numpages) | 1065 | int set_memory_4k(unsigned long addr, int numpages) |
1030 | { | 1066 | { |
1031 | return change_page_attr_set_clr(&addr, numpages, __pgprot(0), | 1067 | return change_page_attr_set_clr(&addr, numpages, __pgprot(0), |
1032 | __pgprot(0), 1, 0); | 1068 | __pgprot(0), 1, 0, NULL); |
1033 | } | 1069 | } |
1034 | 1070 | ||
1035 | int set_pages_uc(struct page *page, int numpages) | 1071 | int set_pages_uc(struct page *page, int numpages) |
@@ -1040,6 +1076,35 @@ int set_pages_uc(struct page *page, int numpages) | |||
1040 | } | 1076 | } |
1041 | EXPORT_SYMBOL(set_pages_uc); | 1077 | EXPORT_SYMBOL(set_pages_uc); |
1042 | 1078 | ||
1079 | int set_pages_array_uc(struct page **pages, int addrinarray) | ||
1080 | { | ||
1081 | unsigned long start; | ||
1082 | unsigned long end; | ||
1083 | int i; | ||
1084 | int free_idx; | ||
1085 | |||
1086 | for (i = 0; i < addrinarray; i++) { | ||
1087 | start = (unsigned long)page_address(pages[i]); | ||
1088 | end = start + PAGE_SIZE; | ||
1089 | if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) | ||
1090 | goto err_out; | ||
1091 | } | ||
1092 | |||
1093 | if (cpa_set_pages_array(pages, addrinarray, | ||
1094 | __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) { | ||
1095 | return 0; /* Success */ | ||
1096 | } | ||
1097 | err_out: | ||
1098 | free_idx = i; | ||
1099 | for (i = 0; i < free_idx; i++) { | ||
1100 | start = (unsigned long)page_address(pages[i]); | ||
1101 | end = start + PAGE_SIZE; | ||
1102 | free_memtype(start, end); | ||
1103 | } | ||
1104 | return -EINVAL; | ||
1105 | } | ||
1106 | EXPORT_SYMBOL(set_pages_array_uc); | ||
1107 | |||
1043 | int set_pages_wb(struct page *page, int numpages) | 1108 | int set_pages_wb(struct page *page, int numpages) |
1044 | { | 1109 | { |
1045 | unsigned long addr = (unsigned long)page_address(page); | 1110 | unsigned long addr = (unsigned long)page_address(page); |
@@ -1048,6 +1113,26 @@ int set_pages_wb(struct page *page, int numpages) | |||
1048 | } | 1113 | } |
1049 | EXPORT_SYMBOL(set_pages_wb); | 1114 | EXPORT_SYMBOL(set_pages_wb); |
1050 | 1115 | ||
1116 | int set_pages_array_wb(struct page **pages, int addrinarray) | ||
1117 | { | ||
1118 | int retval; | ||
1119 | unsigned long start; | ||
1120 | unsigned long end; | ||
1121 | int i; | ||
1122 | |||
1123 | retval = cpa_clear_pages_array(pages, addrinarray, | ||
1124 | __pgprot(_PAGE_CACHE_MASK)); | ||
1125 | |||
1126 | for (i = 0; i < addrinarray; i++) { | ||
1127 | start = (unsigned long)page_address(pages[i]); | ||
1128 | end = start + PAGE_SIZE; | ||
1129 | free_memtype(start, end); | ||
1130 | } | ||
1131 | |||
1132 | return retval; | ||
1133 | } | ||
1134 | EXPORT_SYMBOL(set_pages_array_wb); | ||
1135 | |||
1051 | int set_pages_x(struct page *page, int numpages) | 1136 | int set_pages_x(struct page *page, int numpages) |
1052 | { | 1137 | { |
1053 | unsigned long addr = (unsigned long)page_address(page); | 1138 | unsigned long addr = (unsigned long)page_address(page); |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 2ed37158012d..640339ee4fb2 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -677,10 +677,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, | |||
677 | is_ram = pat_pagerange_is_ram(paddr, paddr + size); | 677 | is_ram = pat_pagerange_is_ram(paddr, paddr + size); |
678 | 678 | ||
679 | /* | 679 | /* |
680 | * reserve_pfn_range() doesn't support RAM pages. | 680 | * reserve_pfn_range() doesn't support RAM pages. Maintain the current |
681 | * behavior with RAM pages by returning success. | ||
681 | */ | 682 | */ |
682 | if (is_ram != 0) | 683 | if (is_ram != 0) |
683 | return -EINVAL; | 684 | return 0; |
684 | 685 | ||
685 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); | 686 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); |
686 | if (ret) | 687 | if (ret) |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index f2e477c91c1b..46c8834aedc0 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -50,7 +50,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) | |||
50 | } | 50 | } |
51 | pte = pte_offset_kernel(pmd, vaddr); | 51 | pte = pte_offset_kernel(pmd, vaddr); |
52 | if (pte_val(pteval)) | 52 | if (pte_val(pteval)) |
53 | set_pte_present(&init_mm, vaddr, pte, pteval); | 53 | set_pte_at(&init_mm, vaddr, pte, pteval); |
54 | else | 54 | else |
55 | pte_clear(&init_mm, vaddr, pte); | 55 | pte_clear(&init_mm, vaddr, pte); |
56 | 56 | ||
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 574c8bc95ef0..c7d272b8574c 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -116,6 +116,36 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | |||
116 | reserve_early(phys, phys + length, "ACPI SLIT"); | 116 | reserve_early(phys, phys + length, "ACPI SLIT"); |
117 | } | 117 | } |
118 | 118 | ||
119 | /* Callback for Proximity Domain -> x2APIC mapping */ | ||
120 | void __init | ||
121 | acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) | ||
122 | { | ||
123 | int pxm, node; | ||
124 | int apic_id; | ||
125 | |||
126 | if (srat_disabled()) | ||
127 | return; | ||
128 | if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) { | ||
129 | bad_srat(); | ||
130 | return; | ||
131 | } | ||
132 | if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) | ||
133 | return; | ||
134 | pxm = pa->proximity_domain; | ||
135 | node = setup_node(pxm); | ||
136 | if (node < 0) { | ||
137 | printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); | ||
138 | bad_srat(); | ||
139 | return; | ||
140 | } | ||
141 | |||
142 | apic_id = pa->apic_id; | ||
143 | apicid_to_node[apic_id] = node; | ||
144 | acpi_numa = 1; | ||
145 | printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", | ||
146 | pxm, apic_id, node); | ||
147 | } | ||
148 | |||
119 | /* Callback for Proximity Domain -> LAPIC mapping */ | 149 | /* Callback for Proximity Domain -> LAPIC mapping */ |
120 | void __init | 150 | void __init |
121 | acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | 151 | acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a654d59e4483..821e97017e95 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -187,11 +187,6 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
187 | cpumask, cpumask_of(smp_processor_id())); | 187 | cpumask, cpumask_of(smp_processor_id())); |
188 | 188 | ||
189 | /* | 189 | /* |
190 | * Make the above memory operations globally visible before | ||
191 | * sending the IPI. | ||
192 | */ | ||
193 | smp_mb(); | ||
194 | /* | ||
195 | * We have to send the IPI only to | 190 | * We have to send the IPI only to |
196 | * CPUs affected. | 191 | * CPUs affected. |
197 | */ | 192 | */ |