diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/init.c | 459 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 106 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 255 | ||||
-rw-r--r-- | arch/x86/mm/mm_internal.h | 19 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 32 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 161 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 13 | ||||
-rw-r--r-- | arch/x86/mm/numa_internal.h | 6 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 66 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/physaddr.c | 60 |
12 files changed, 634 insertions, 554 deletions
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d7aea41563b3..d41815265a0b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -17,86 +17,132 @@ | |||
17 | #include <asm/proto.h> | 17 | #include <asm/proto.h> |
18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ | 18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ |
19 | 19 | ||
20 | unsigned long __initdata pgt_buf_start; | 20 | #include "mm_internal.h" |
21 | unsigned long __meminitdata pgt_buf_end; | ||
22 | unsigned long __meminitdata pgt_buf_top; | ||
23 | 21 | ||
24 | int after_bootmem; | 22 | static unsigned long __initdata pgt_buf_start; |
23 | static unsigned long __initdata pgt_buf_end; | ||
24 | static unsigned long __initdata pgt_buf_top; | ||
25 | 25 | ||
26 | int direct_gbpages | 26 | static unsigned long min_pfn_mapped; |
27 | #ifdef CONFIG_DIRECT_GBPAGES | ||
28 | = 1 | ||
29 | #endif | ||
30 | ; | ||
31 | 27 | ||
32 | struct map_range { | 28 | static bool __initdata can_use_brk_pgt = true; |
33 | unsigned long start; | ||
34 | unsigned long end; | ||
35 | unsigned page_size_mask; | ||
36 | }; | ||
37 | 29 | ||
38 | /* | 30 | /* |
39 | * First calculate space needed for kernel direct mapping page tables to cover | 31 | * Pages returned are already directly mapped. |
40 | * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB | 32 | * |
41 | * pages. Then find enough contiguous space for those page tables. | 33 | * Changing that is likely to break Xen, see commit: |
34 | * | ||
35 | * 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve | ||
36 | * | ||
37 | * for detailed information. | ||
42 | */ | 38 | */ |
43 | static void __init find_early_table_space(struct map_range *mr, int nr_range) | 39 | __ref void *alloc_low_pages(unsigned int num) |
44 | { | 40 | { |
41 | unsigned long pfn; | ||
45 | int i; | 42 | int i; |
46 | unsigned long puds = 0, pmds = 0, ptes = 0, tables; | ||
47 | unsigned long start = 0, good_end; | ||
48 | phys_addr_t base; | ||
49 | 43 | ||
50 | for (i = 0; i < nr_range; i++) { | 44 | if (after_bootmem) { |
51 | unsigned long range, extra; | 45 | unsigned int order; |
52 | 46 | ||
53 | range = mr[i].end - mr[i].start; | 47 | order = get_order((unsigned long)num << PAGE_SHIFT); |
54 | puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; | 48 | return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK | |
49 | __GFP_ZERO, order); | ||
50 | } | ||
55 | 51 | ||
56 | if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { | 52 | if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { |
57 | extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); | 53 | unsigned long ret; |
58 | pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; | 54 | if (min_pfn_mapped >= max_pfn_mapped) |
59 | } else { | 55 | panic("alloc_low_page: ran out of memory"); |
60 | pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; | 56 | ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, |
61 | } | 57 | max_pfn_mapped << PAGE_SHIFT, |
58 | PAGE_SIZE * num , PAGE_SIZE); | ||
59 | if (!ret) | ||
60 | panic("alloc_low_page: can not alloc memory"); | ||
61 | memblock_reserve(ret, PAGE_SIZE * num); | ||
62 | pfn = ret >> PAGE_SHIFT; | ||
63 | } else { | ||
64 | pfn = pgt_buf_end; | ||
65 | pgt_buf_end += num; | ||
66 | printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n", | ||
67 | pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1); | ||
68 | } | ||
62 | 69 | ||
63 | if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { | 70 | for (i = 0; i < num; i++) { |
64 | extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); | 71 | void *adr; |
65 | #ifdef CONFIG_X86_32 | 72 | |
66 | extra += PMD_SIZE; | 73 | adr = __va((pfn + i) << PAGE_SHIFT); |
67 | #endif | 74 | clear_page(adr); |
68 | ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
69 | } else { | ||
70 | ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
71 | } | ||
72 | } | 75 | } |
73 | 76 | ||
74 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); | 77 | return __va(pfn << PAGE_SHIFT); |
75 | tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); | 78 | } |
76 | tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); | ||
77 | 79 | ||
78 | #ifdef CONFIG_X86_32 | 80 | /* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */ |
79 | /* for fixmap */ | 81 | #define INIT_PGT_BUF_SIZE (5 * PAGE_SIZE) |
80 | tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); | 82 | RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); |
81 | #endif | 83 | void __init early_alloc_pgt_buf(void) |
82 | good_end = max_pfn_mapped << PAGE_SHIFT; | 84 | { |
85 | unsigned long tables = INIT_PGT_BUF_SIZE; | ||
86 | phys_addr_t base; | ||
83 | 87 | ||
84 | base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); | 88 | base = __pa(extend_brk(tables, PAGE_SIZE)); |
85 | if (!base) | ||
86 | panic("Cannot find space for the kernel page tables"); | ||
87 | 89 | ||
88 | pgt_buf_start = base >> PAGE_SHIFT; | 90 | pgt_buf_start = base >> PAGE_SHIFT; |
89 | pgt_buf_end = pgt_buf_start; | 91 | pgt_buf_end = pgt_buf_start; |
90 | pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); | 92 | pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); |
93 | } | ||
94 | |||
95 | int after_bootmem; | ||
96 | |||
97 | int direct_gbpages | ||
98 | #ifdef CONFIG_DIRECT_GBPAGES | ||
99 | = 1 | ||
100 | #endif | ||
101 | ; | ||
91 | 102 | ||
92 | printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", | 103 | static void __init init_gbpages(void) |
93 | mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, | 104 | { |
94 | (pgt_buf_top << PAGE_SHIFT) - 1); | 105 | #ifdef CONFIG_X86_64 |
106 | if (direct_gbpages && cpu_has_gbpages) | ||
107 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
108 | else | ||
109 | direct_gbpages = 0; | ||
110 | #endif | ||
95 | } | 111 | } |
96 | 112 | ||
97 | void __init native_pagetable_reserve(u64 start, u64 end) | 113 | struct map_range { |
114 | unsigned long start; | ||
115 | unsigned long end; | ||
116 | unsigned page_size_mask; | ||
117 | }; | ||
118 | |||
119 | static int page_size_mask; | ||
120 | |||
121 | static void __init probe_page_size_mask(void) | ||
98 | { | 122 | { |
99 | memblock_reserve(start, end - start); | 123 | init_gbpages(); |
124 | |||
125 | #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) | ||
126 | /* | ||
127 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | ||
128 | * This will simplify cpa(), which otherwise needs to support splitting | ||
129 | * large pages into small in interrupt context, etc. | ||
130 | */ | ||
131 | if (direct_gbpages) | ||
132 | page_size_mask |= 1 << PG_LEVEL_1G; | ||
133 | if (cpu_has_pse) | ||
134 | page_size_mask |= 1 << PG_LEVEL_2M; | ||
135 | #endif | ||
136 | |||
137 | /* Enable PSE if available */ | ||
138 | if (cpu_has_pse) | ||
139 | set_in_cr4(X86_CR4_PSE); | ||
140 | |||
141 | /* Enable PGE if available */ | ||
142 | if (cpu_has_pge) { | ||
143 | set_in_cr4(X86_CR4_PGE); | ||
144 | __supported_pte_mask |= _PAGE_GLOBAL; | ||
145 | } | ||
100 | } | 146 | } |
101 | 147 | ||
102 | #ifdef CONFIG_X86_32 | 148 | #ifdef CONFIG_X86_32 |
@@ -122,58 +168,51 @@ static int __meminit save_mr(struct map_range *mr, int nr_range, | |||
122 | } | 168 | } |
123 | 169 | ||
124 | /* | 170 | /* |
125 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | 171 | * adjust the page_size_mask for small range to go with |
126 | * This runs before bootmem is initialized and gets pages directly from | 172 | * big page size instead small one if nearby are ram too. |
127 | * the physical memory. To access them they are temporarily mapped. | ||
128 | */ | 173 | */ |
129 | unsigned long __init_refok init_memory_mapping(unsigned long start, | 174 | static void __init_refok adjust_range_page_size_mask(struct map_range *mr, |
130 | unsigned long end) | 175 | int nr_range) |
131 | { | 176 | { |
132 | unsigned long page_size_mask = 0; | 177 | int i; |
133 | unsigned long start_pfn, end_pfn; | ||
134 | unsigned long ret = 0; | ||
135 | unsigned long pos; | ||
136 | |||
137 | struct map_range mr[NR_RANGE_MR]; | ||
138 | int nr_range, i; | ||
139 | int use_pse, use_gbpages; | ||
140 | 178 | ||
141 | printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n", | 179 | for (i = 0; i < nr_range; i++) { |
142 | start, end - 1); | 180 | if ((page_size_mask & (1<<PG_LEVEL_2M)) && |
181 | !(mr[i].page_size_mask & (1<<PG_LEVEL_2M))) { | ||
182 | unsigned long start = round_down(mr[i].start, PMD_SIZE); | ||
183 | unsigned long end = round_up(mr[i].end, PMD_SIZE); | ||
143 | 184 | ||
144 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) | 185 | #ifdef CONFIG_X86_32 |
145 | /* | 186 | if ((end >> PAGE_SHIFT) > max_low_pfn) |
146 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | 187 | continue; |
147 | * This will simplify cpa(), which otherwise needs to support splitting | ||
148 | * large pages into small in interrupt context, etc. | ||
149 | */ | ||
150 | use_pse = use_gbpages = 0; | ||
151 | #else | ||
152 | use_pse = cpu_has_pse; | ||
153 | use_gbpages = direct_gbpages; | ||
154 | #endif | 188 | #endif |
155 | 189 | ||
156 | /* Enable PSE if available */ | 190 | if (memblock_is_region_memory(start, end - start)) |
157 | if (cpu_has_pse) | 191 | mr[i].page_size_mask |= 1<<PG_LEVEL_2M; |
158 | set_in_cr4(X86_CR4_PSE); | 192 | } |
193 | if ((page_size_mask & (1<<PG_LEVEL_1G)) && | ||
194 | !(mr[i].page_size_mask & (1<<PG_LEVEL_1G))) { | ||
195 | unsigned long start = round_down(mr[i].start, PUD_SIZE); | ||
196 | unsigned long end = round_up(mr[i].end, PUD_SIZE); | ||
159 | 197 | ||
160 | /* Enable PGE if available */ | 198 | if (memblock_is_region_memory(start, end - start)) |
161 | if (cpu_has_pge) { | 199 | mr[i].page_size_mask |= 1<<PG_LEVEL_1G; |
162 | set_in_cr4(X86_CR4_PGE); | 200 | } |
163 | __supported_pte_mask |= _PAGE_GLOBAL; | ||
164 | } | 201 | } |
202 | } | ||
165 | 203 | ||
166 | if (use_gbpages) | 204 | static int __meminit split_mem_range(struct map_range *mr, int nr_range, |
167 | page_size_mask |= 1 << PG_LEVEL_1G; | 205 | unsigned long start, |
168 | if (use_pse) | 206 | unsigned long end) |
169 | page_size_mask |= 1 << PG_LEVEL_2M; | 207 | { |
208 | unsigned long start_pfn, end_pfn, limit_pfn; | ||
209 | unsigned long pfn; | ||
210 | int i; | ||
170 | 211 | ||
171 | memset(mr, 0, sizeof(mr)); | 212 | limit_pfn = PFN_DOWN(end); |
172 | nr_range = 0; | ||
173 | 213 | ||
174 | /* head if not big page alignment ? */ | 214 | /* head if not big page alignment ? */ |
175 | start_pfn = start >> PAGE_SHIFT; | 215 | pfn = start_pfn = PFN_DOWN(start); |
176 | pos = start_pfn << PAGE_SHIFT; | ||
177 | #ifdef CONFIG_X86_32 | 216 | #ifdef CONFIG_X86_32 |
178 | /* | 217 | /* |
179 | * Don't use a large page for the first 2/4MB of memory | 218 | * Don't use a large page for the first 2/4MB of memory |
@@ -181,66 +220,60 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
181 | * and overlapping MTRRs into large pages can cause | 220 | * and overlapping MTRRs into large pages can cause |
182 | * slowdowns. | 221 | * slowdowns. |
183 | */ | 222 | */ |
184 | if (pos == 0) | 223 | if (pfn == 0) |
185 | end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); | 224 | end_pfn = PFN_DOWN(PMD_SIZE); |
186 | else | 225 | else |
187 | end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | 226 | end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
188 | << (PMD_SHIFT - PAGE_SHIFT); | ||
189 | #else /* CONFIG_X86_64 */ | 227 | #else /* CONFIG_X86_64 */ |
190 | end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) | 228 | end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
191 | << (PMD_SHIFT - PAGE_SHIFT); | ||
192 | #endif | 229 | #endif |
193 | if (end_pfn > (end >> PAGE_SHIFT)) | 230 | if (end_pfn > limit_pfn) |
194 | end_pfn = end >> PAGE_SHIFT; | 231 | end_pfn = limit_pfn; |
195 | if (start_pfn < end_pfn) { | 232 | if (start_pfn < end_pfn) { |
196 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | 233 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); |
197 | pos = end_pfn << PAGE_SHIFT; | 234 | pfn = end_pfn; |
198 | } | 235 | } |
199 | 236 | ||
200 | /* big page (2M) range */ | 237 | /* big page (2M) range */ |
201 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | 238 | start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
202 | << (PMD_SHIFT - PAGE_SHIFT); | ||
203 | #ifdef CONFIG_X86_32 | 239 | #ifdef CONFIG_X86_32 |
204 | end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | 240 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); |
205 | #else /* CONFIG_X86_64 */ | 241 | #else /* CONFIG_X86_64 */ |
206 | end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | 242 | end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); |
207 | << (PUD_SHIFT - PAGE_SHIFT); | 243 | if (end_pfn > round_down(limit_pfn, PFN_DOWN(PMD_SIZE))) |
208 | if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) | 244 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); |
209 | end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); | ||
210 | #endif | 245 | #endif |
211 | 246 | ||
212 | if (start_pfn < end_pfn) { | 247 | if (start_pfn < end_pfn) { |
213 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | 248 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, |
214 | page_size_mask & (1<<PG_LEVEL_2M)); | 249 | page_size_mask & (1<<PG_LEVEL_2M)); |
215 | pos = end_pfn << PAGE_SHIFT; | 250 | pfn = end_pfn; |
216 | } | 251 | } |
217 | 252 | ||
218 | #ifdef CONFIG_X86_64 | 253 | #ifdef CONFIG_X86_64 |
219 | /* big page (1G) range */ | 254 | /* big page (1G) range */ |
220 | start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | 255 | start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); |
221 | << (PUD_SHIFT - PAGE_SHIFT); | 256 | end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE)); |
222 | end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); | ||
223 | if (start_pfn < end_pfn) { | 257 | if (start_pfn < end_pfn) { |
224 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | 258 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, |
225 | page_size_mask & | 259 | page_size_mask & |
226 | ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); | 260 | ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); |
227 | pos = end_pfn << PAGE_SHIFT; | 261 | pfn = end_pfn; |
228 | } | 262 | } |
229 | 263 | ||
230 | /* tail is not big page (1G) alignment */ | 264 | /* tail is not big page (1G) alignment */ |
231 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | 265 | start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
232 | << (PMD_SHIFT - PAGE_SHIFT); | 266 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); |
233 | end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | ||
234 | if (start_pfn < end_pfn) { | 267 | if (start_pfn < end_pfn) { |
235 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | 268 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, |
236 | page_size_mask & (1<<PG_LEVEL_2M)); | 269 | page_size_mask & (1<<PG_LEVEL_2M)); |
237 | pos = end_pfn << PAGE_SHIFT; | 270 | pfn = end_pfn; |
238 | } | 271 | } |
239 | #endif | 272 | #endif |
240 | 273 | ||
241 | /* tail is not big page (2M) alignment */ | 274 | /* tail is not big page (2M) alignment */ |
242 | start_pfn = pos>>PAGE_SHIFT; | 275 | start_pfn = pfn; |
243 | end_pfn = end>>PAGE_SHIFT; | 276 | end_pfn = limit_pfn; |
244 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | 277 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); |
245 | 278 | ||
246 | /* try to merge same page size and continuous */ | 279 | /* try to merge same page size and continuous */ |
@@ -257,59 +290,169 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
257 | nr_range--; | 290 | nr_range--; |
258 | } | 291 | } |
259 | 292 | ||
293 | if (!after_bootmem) | ||
294 | adjust_range_page_size_mask(mr, nr_range); | ||
295 | |||
260 | for (i = 0; i < nr_range; i++) | 296 | for (i = 0; i < nr_range; i++) |
261 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", | 297 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", |
262 | mr[i].start, mr[i].end - 1, | 298 | mr[i].start, mr[i].end - 1, |
263 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( | 299 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( |
264 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | 300 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); |
265 | 301 | ||
266 | /* | 302 | return nr_range; |
267 | * Find space for the kernel direct mapping tables. | 303 | } |
268 | * | 304 | |
269 | * Later we should allocate these tables in the local node of the | 305 | struct range pfn_mapped[E820_X_MAX]; |
270 | * memory mapped. Unfortunately this is done currently before the | 306 | int nr_pfn_mapped; |
271 | * nodes are discovered. | 307 | |
272 | */ | 308 | static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) |
273 | if (!after_bootmem) | 309 | { |
274 | find_early_table_space(mr, nr_range); | 310 | nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX, |
311 | nr_pfn_mapped, start_pfn, end_pfn); | ||
312 | nr_pfn_mapped = clean_sort_range(pfn_mapped, E820_X_MAX); | ||
313 | |||
314 | max_pfn_mapped = max(max_pfn_mapped, end_pfn); | ||
315 | |||
316 | if (start_pfn < (1UL<<(32-PAGE_SHIFT))) | ||
317 | max_low_pfn_mapped = max(max_low_pfn_mapped, | ||
318 | min(end_pfn, 1UL<<(32-PAGE_SHIFT))); | ||
319 | } | ||
320 | |||
321 | bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) | ||
322 | { | ||
323 | int i; | ||
324 | |||
325 | for (i = 0; i < nr_pfn_mapped; i++) | ||
326 | if ((start_pfn >= pfn_mapped[i].start) && | ||
327 | (end_pfn <= pfn_mapped[i].end)) | ||
328 | return true; | ||
329 | |||
330 | return false; | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | ||
335 | * This runs before bootmem is initialized and gets pages directly from | ||
336 | * the physical memory. To access them they are temporarily mapped. | ||
337 | */ | ||
338 | unsigned long __init_refok init_memory_mapping(unsigned long start, | ||
339 | unsigned long end) | ||
340 | { | ||
341 | struct map_range mr[NR_RANGE_MR]; | ||
342 | unsigned long ret = 0; | ||
343 | int nr_range, i; | ||
344 | |||
345 | pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n", | ||
346 | start, end - 1); | ||
347 | |||
348 | memset(mr, 0, sizeof(mr)); | ||
349 | nr_range = split_mem_range(mr, 0, start, end); | ||
275 | 350 | ||
276 | for (i = 0; i < nr_range; i++) | 351 | for (i = 0; i < nr_range; i++) |
277 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, | 352 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, |
278 | mr[i].page_size_mask); | 353 | mr[i].page_size_mask); |
279 | 354 | ||
280 | #ifdef CONFIG_X86_32 | 355 | add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); |
281 | early_ioremap_page_table_range_init(); | ||
282 | 356 | ||
283 | load_cr3(swapper_pg_dir); | 357 | return ret >> PAGE_SHIFT; |
284 | #endif | 358 | } |
285 | 359 | ||
286 | __flush_tlb_all(); | 360 | /* |
361 | * would have hole in the middle or ends, and only ram parts will be mapped. | ||
362 | */ | ||
363 | static unsigned long __init init_range_memory_mapping( | ||
364 | unsigned long r_start, | ||
365 | unsigned long r_end) | ||
366 | { | ||
367 | unsigned long start_pfn, end_pfn; | ||
368 | unsigned long mapped_ram_size = 0; | ||
369 | int i; | ||
287 | 370 | ||
288 | /* | 371 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { |
289 | * Reserve the kernel pagetable pages we used (pgt_buf_start - | 372 | u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end); |
290 | * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) | 373 | u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end); |
291 | * so that they can be reused for other purposes. | 374 | if (start >= end) |
292 | * | 375 | continue; |
293 | * On native it just means calling memblock_reserve, on Xen it also | ||
294 | * means marking RW the pagetable pages that we allocated before | ||
295 | * but that haven't been used. | ||
296 | * | ||
297 | * In fact on xen we mark RO the whole range pgt_buf_start - | ||
298 | * pgt_buf_top, because we have to make sure that when | ||
299 | * init_memory_mapping reaches the pagetable pages area, it maps | ||
300 | * RO all the pagetable pages, including the ones that are beyond | ||
301 | * pgt_buf_end at that time. | ||
302 | */ | ||
303 | if (!after_bootmem && pgt_buf_end > pgt_buf_start) | ||
304 | x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start), | ||
305 | PFN_PHYS(pgt_buf_end)); | ||
306 | 376 | ||
307 | if (!after_bootmem) | 377 | /* |
308 | early_memtest(start, end); | 378 | * if it is overlapping with brk pgt, we need to |
379 | * alloc pgt buf from memblock instead. | ||
380 | */ | ||
381 | can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >= | ||
382 | min(end, (u64)pgt_buf_top<<PAGE_SHIFT); | ||
383 | init_memory_mapping(start, end); | ||
384 | mapped_ram_size += end - start; | ||
385 | can_use_brk_pgt = true; | ||
386 | } | ||
309 | 387 | ||
310 | return ret >> PAGE_SHIFT; | 388 | return mapped_ram_size; |
311 | } | 389 | } |
312 | 390 | ||
391 | /* (PUD_SHIFT-PMD_SHIFT)/2 */ | ||
392 | #define STEP_SIZE_SHIFT 5 | ||
393 | void __init init_mem_mapping(void) | ||
394 | { | ||
395 | unsigned long end, real_end, start, last_start; | ||
396 | unsigned long step_size; | ||
397 | unsigned long addr; | ||
398 | unsigned long mapped_ram_size = 0; | ||
399 | unsigned long new_mapped_ram_size; | ||
400 | |||
401 | probe_page_size_mask(); | ||
402 | |||
403 | #ifdef CONFIG_X86_64 | ||
404 | end = max_pfn << PAGE_SHIFT; | ||
405 | #else | ||
406 | end = max_low_pfn << PAGE_SHIFT; | ||
407 | #endif | ||
408 | |||
409 | /* the ISA range is always mapped regardless of memory holes */ | ||
410 | init_memory_mapping(0, ISA_END_ADDRESS); | ||
411 | |||
412 | /* xen has big range in reserved near end of ram, skip it at first */ | ||
413 | addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, | ||
414 | PAGE_SIZE); | ||
415 | real_end = addr + PMD_SIZE; | ||
416 | |||
417 | /* step_size need to be small so pgt_buf from BRK could cover it */ | ||
418 | step_size = PMD_SIZE; | ||
419 | max_pfn_mapped = 0; /* will get exact value next */ | ||
420 | min_pfn_mapped = real_end >> PAGE_SHIFT; | ||
421 | last_start = start = real_end; | ||
422 | while (last_start > ISA_END_ADDRESS) { | ||
423 | if (last_start > step_size) { | ||
424 | start = round_down(last_start - 1, step_size); | ||
425 | if (start < ISA_END_ADDRESS) | ||
426 | start = ISA_END_ADDRESS; | ||
427 | } else | ||
428 | start = ISA_END_ADDRESS; | ||
429 | new_mapped_ram_size = init_range_memory_mapping(start, | ||
430 | last_start); | ||
431 | last_start = start; | ||
432 | min_pfn_mapped = last_start >> PAGE_SHIFT; | ||
433 | /* only increase step_size after big range get mapped */ | ||
434 | if (new_mapped_ram_size > mapped_ram_size) | ||
435 | step_size <<= STEP_SIZE_SHIFT; | ||
436 | mapped_ram_size += new_mapped_ram_size; | ||
437 | } | ||
438 | |||
439 | if (real_end < end) | ||
440 | init_range_memory_mapping(real_end, end); | ||
441 | |||
442 | #ifdef CONFIG_X86_64 | ||
443 | if (max_pfn > max_low_pfn) { | ||
444 | /* can we preseve max_low_pfn ?*/ | ||
445 | max_low_pfn = max_pfn; | ||
446 | } | ||
447 | #else | ||
448 | early_ioremap_page_table_range_init(); | ||
449 | #endif | ||
450 | |||
451 | load_cr3(swapper_pg_dir); | ||
452 | __flush_tlb_all(); | ||
453 | |||
454 | early_memtest(0, max_pfn_mapped << PAGE_SHIFT); | ||
455 | } | ||
313 | 456 | ||
314 | /* | 457 | /* |
315 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | 458 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 745d66b843c8..b299724f6e34 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -53,25 +53,14 @@ | |||
53 | #include <asm/page_types.h> | 53 | #include <asm/page_types.h> |
54 | #include <asm/init.h> | 54 | #include <asm/init.h> |
55 | 55 | ||
56 | #include "mm_internal.h" | ||
57 | |||
56 | unsigned long highstart_pfn, highend_pfn; | 58 | unsigned long highstart_pfn, highend_pfn; |
57 | 59 | ||
58 | static noinline int do_test_wp_bit(void); | 60 | static noinline int do_test_wp_bit(void); |
59 | 61 | ||
60 | bool __read_mostly __vmalloc_start_set = false; | 62 | bool __read_mostly __vmalloc_start_set = false; |
61 | 63 | ||
62 | static __init void *alloc_low_page(void) | ||
63 | { | ||
64 | unsigned long pfn = pgt_buf_end++; | ||
65 | void *adr; | ||
66 | |||
67 | if (pfn >= pgt_buf_top) | ||
68 | panic("alloc_low_page: ran out of memory"); | ||
69 | |||
70 | adr = __va(pfn * PAGE_SIZE); | ||
71 | clear_page(adr); | ||
72 | return adr; | ||
73 | } | ||
74 | |||
75 | /* | 64 | /* |
76 | * Creates a middle page table and puts a pointer to it in the | 65 | * Creates a middle page table and puts a pointer to it in the |
77 | * given global directory entry. This only returns the gd entry | 66 | * given global directory entry. This only returns the gd entry |
@@ -84,10 +73,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
84 | 73 | ||
85 | #ifdef CONFIG_X86_PAE | 74 | #ifdef CONFIG_X86_PAE |
86 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { | 75 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
87 | if (after_bootmem) | 76 | pmd_table = (pmd_t *)alloc_low_page(); |
88 | pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE); | ||
89 | else | ||
90 | pmd_table = (pmd_t *)alloc_low_page(); | ||
91 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); | 77 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); |
92 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 78 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
93 | pud = pud_offset(pgd, 0); | 79 | pud = pud_offset(pgd, 0); |
@@ -109,17 +95,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
109 | static pte_t * __init one_page_table_init(pmd_t *pmd) | 95 | static pte_t * __init one_page_table_init(pmd_t *pmd) |
110 | { | 96 | { |
111 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { | 97 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { |
112 | pte_t *page_table = NULL; | 98 | pte_t *page_table = (pte_t *)alloc_low_page(); |
113 | |||
114 | if (after_bootmem) { | ||
115 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) | ||
116 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | ||
117 | #endif | ||
118 | if (!page_table) | ||
119 | page_table = | ||
120 | (pte_t *)alloc_bootmem_pages(PAGE_SIZE); | ||
121 | } else | ||
122 | page_table = (pte_t *)alloc_low_page(); | ||
123 | 99 | ||
124 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); | 100 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); |
125 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); | 101 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
@@ -146,8 +122,39 @@ pte_t * __init populate_extra_pte(unsigned long vaddr) | |||
146 | return one_page_table_init(pmd) + pte_idx; | 122 | return one_page_table_init(pmd) + pte_idx; |
147 | } | 123 | } |
148 | 124 | ||
125 | static unsigned long __init | ||
126 | page_table_range_init_count(unsigned long start, unsigned long end) | ||
127 | { | ||
128 | unsigned long count = 0; | ||
129 | #ifdef CONFIG_HIGHMEM | ||
130 | int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT; | ||
131 | int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT; | ||
132 | int pgd_idx, pmd_idx; | ||
133 | unsigned long vaddr; | ||
134 | |||
135 | if (pmd_idx_kmap_begin == pmd_idx_kmap_end) | ||
136 | return 0; | ||
137 | |||
138 | vaddr = start; | ||
139 | pgd_idx = pgd_index(vaddr); | ||
140 | |||
141 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) { | ||
142 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); | ||
143 | pmd_idx++) { | ||
144 | if ((vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin && | ||
145 | (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) | ||
146 | count++; | ||
147 | vaddr += PMD_SIZE; | ||
148 | } | ||
149 | pmd_idx = 0; | ||
150 | } | ||
151 | #endif | ||
152 | return count; | ||
153 | } | ||
154 | |||
149 | static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, | 155 | static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, |
150 | unsigned long vaddr, pte_t *lastpte) | 156 | unsigned long vaddr, pte_t *lastpte, |
157 | void **adr) | ||
151 | { | 158 | { |
152 | #ifdef CONFIG_HIGHMEM | 159 | #ifdef CONFIG_HIGHMEM |
153 | /* | 160 | /* |
@@ -161,16 +168,15 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, | |||
161 | 168 | ||
162 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end | 169 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end |
163 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin | 170 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin |
164 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end | 171 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) { |
165 | && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start | ||
166 | || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) { | ||
167 | pte_t *newpte; | 172 | pte_t *newpte; |
168 | int i; | 173 | int i; |
169 | 174 | ||
170 | BUG_ON(after_bootmem); | 175 | BUG_ON(after_bootmem); |
171 | newpte = alloc_low_page(); | 176 | newpte = *adr; |
172 | for (i = 0; i < PTRS_PER_PTE; i++) | 177 | for (i = 0; i < PTRS_PER_PTE; i++) |
173 | set_pte(newpte + i, pte[i]); | 178 | set_pte(newpte + i, pte[i]); |
179 | *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE); | ||
174 | 180 | ||
175 | paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); | 181 | paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); |
176 | set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); | 182 | set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); |
@@ -204,6 +210,11 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) | |||
204 | pgd_t *pgd; | 210 | pgd_t *pgd; |
205 | pmd_t *pmd; | 211 | pmd_t *pmd; |
206 | pte_t *pte = NULL; | 212 | pte_t *pte = NULL; |
213 | unsigned long count = page_table_range_init_count(start, end); | ||
214 | void *adr = NULL; | ||
215 | |||
216 | if (count) | ||
217 | adr = alloc_low_pages(count); | ||
207 | 218 | ||
208 | vaddr = start; | 219 | vaddr = start; |
209 | pgd_idx = pgd_index(vaddr); | 220 | pgd_idx = pgd_index(vaddr); |
@@ -216,7 +227,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) | |||
216 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); | 227 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); |
217 | pmd++, pmd_idx++) { | 228 | pmd++, pmd_idx++) { |
218 | pte = page_table_kmap_check(one_page_table_init(pmd), | 229 | pte = page_table_kmap_check(one_page_table_init(pmd), |
219 | pmd, vaddr, pte); | 230 | pmd, vaddr, pte, &adr); |
220 | 231 | ||
221 | vaddr += PMD_SIZE; | 232 | vaddr += PMD_SIZE; |
222 | } | 233 | } |
@@ -310,6 +321,7 @@ repeat: | |||
310 | __pgprot(PTE_IDENT_ATTR | | 321 | __pgprot(PTE_IDENT_ATTR | |
311 | _PAGE_PSE); | 322 | _PAGE_PSE); |
312 | 323 | ||
324 | pfn &= PMD_MASK >> PAGE_SHIFT; | ||
313 | addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + | 325 | addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + |
314 | PAGE_OFFSET + PAGE_SIZE-1; | 326 | PAGE_OFFSET + PAGE_SIZE-1; |
315 | 327 | ||
@@ -455,9 +467,14 @@ void __init native_pagetable_init(void) | |||
455 | 467 | ||
456 | /* | 468 | /* |
457 | * Remove any mappings which extend past the end of physical | 469 | * Remove any mappings which extend past the end of physical |
458 | * memory from the boot time page table: | 470 | * memory from the boot time page table. |
471 | * In virtual address space, we should have at least two pages | ||
472 | * from VMALLOC_END to pkmap or fixmap according to VMALLOC_END | ||
473 | * definition. And max_low_pfn is set to VMALLOC_END physical | ||
474 | * address. If initial memory mapping is doing right job, we | ||
475 | * should have pte used near max_low_pfn or one pmd is not present. | ||
459 | */ | 476 | */ |
460 | for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) { | 477 | for (pfn = max_low_pfn; pfn < 1<<(32-PAGE_SHIFT); pfn++) { |
461 | va = PAGE_OFFSET + (pfn<<PAGE_SHIFT); | 478 | va = PAGE_OFFSET + (pfn<<PAGE_SHIFT); |
462 | pgd = base + pgd_index(va); | 479 | pgd = base + pgd_index(va); |
463 | if (!pgd_present(*pgd)) | 480 | if (!pgd_present(*pgd)) |
@@ -468,10 +485,19 @@ void __init native_pagetable_init(void) | |||
468 | if (!pmd_present(*pmd)) | 485 | if (!pmd_present(*pmd)) |
469 | break; | 486 | break; |
470 | 487 | ||
488 | /* should not be large page here */ | ||
489 | if (pmd_large(*pmd)) { | ||
490 | pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n", | ||
491 | pfn, pmd, __pa(pmd)); | ||
492 | BUG_ON(1); | ||
493 | } | ||
494 | |||
471 | pte = pte_offset_kernel(pmd, va); | 495 | pte = pte_offset_kernel(pmd, va); |
472 | if (!pte_present(*pte)) | 496 | if (!pte_present(*pte)) |
473 | break; | 497 | break; |
474 | 498 | ||
499 | printk(KERN_DEBUG "clearing pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx pte: %p pte phys: %lx\n", | ||
500 | pfn, pmd, __pa(pmd), pte, __pa(pte)); | ||
475 | pte_clear(NULL, va, pte); | 501 | pte_clear(NULL, va, pte); |
476 | } | 502 | } |
477 | paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); | 503 | paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); |
@@ -550,7 +576,7 @@ early_param("highmem", parse_highmem); | |||
550 | * artificially via the highmem=x boot parameter then create | 576 | * artificially via the highmem=x boot parameter then create |
551 | * it: | 577 | * it: |
552 | */ | 578 | */ |
553 | void __init lowmem_pfn_init(void) | 579 | static void __init lowmem_pfn_init(void) |
554 | { | 580 | { |
555 | /* max_low_pfn is 0, we already have early_res support */ | 581 | /* max_low_pfn is 0, we already have early_res support */ |
556 | max_low_pfn = max_pfn; | 582 | max_low_pfn = max_pfn; |
@@ -586,7 +612,7 @@ void __init lowmem_pfn_init(void) | |||
586 | * We have more RAM than fits into lowmem - we try to put it into | 612 | * We have more RAM than fits into lowmem - we try to put it into |
587 | * highmem, also taking the highmem=x boot parameter into account: | 613 | * highmem, also taking the highmem=x boot parameter into account: |
588 | */ | 614 | */ |
589 | void __init highmem_pfn_init(void) | 615 | static void __init highmem_pfn_init(void) |
590 | { | 616 | { |
591 | max_low_pfn = MAXMEM_PFN; | 617 | max_low_pfn = MAXMEM_PFN; |
592 | 618 | ||
@@ -669,8 +695,6 @@ void __init setup_bootmem_allocator(void) | |||
669 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 695 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
670 | max_pfn_mapped<<PAGE_SHIFT); | 696 | max_pfn_mapped<<PAGE_SHIFT); |
671 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); | 697 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); |
672 | |||
673 | after_bootmem = 1; | ||
674 | } | 698 | } |
675 | 699 | ||
676 | /* | 700 | /* |
@@ -753,6 +777,8 @@ void __init mem_init(void) | |||
753 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) | 777 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) |
754 | reservedpages++; | 778 | reservedpages++; |
755 | 779 | ||
780 | after_bootmem = 1; | ||
781 | |||
756 | codesize = (unsigned long) &_etext - (unsigned long) &_text; | 782 | codesize = (unsigned long) &_etext - (unsigned long) &_text; |
757 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; | 783 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; |
758 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; | 784 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d6eeead43758..3eba7f429880 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -54,6 +54,82 @@ | |||
54 | #include <asm/uv/uv.h> | 54 | #include <asm/uv/uv.h> |
55 | #include <asm/setup.h> | 55 | #include <asm/setup.h> |
56 | 56 | ||
57 | #include "mm_internal.h" | ||
58 | |||
59 | static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page, | ||
60 | unsigned long addr, unsigned long end) | ||
61 | { | ||
62 | addr &= PMD_MASK; | ||
63 | for (; addr < end; addr += PMD_SIZE) { | ||
64 | pmd_t *pmd = pmd_page + pmd_index(addr); | ||
65 | |||
66 | if (!pmd_present(*pmd)) | ||
67 | set_pmd(pmd, __pmd(addr | pmd_flag)); | ||
68 | } | ||
69 | } | ||
70 | static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, | ||
71 | unsigned long addr, unsigned long end) | ||
72 | { | ||
73 | unsigned long next; | ||
74 | |||
75 | for (; addr < end; addr = next) { | ||
76 | pud_t *pud = pud_page + pud_index(addr); | ||
77 | pmd_t *pmd; | ||
78 | |||
79 | next = (addr & PUD_MASK) + PUD_SIZE; | ||
80 | if (next > end) | ||
81 | next = end; | ||
82 | |||
83 | if (pud_present(*pud)) { | ||
84 | pmd = pmd_offset(pud, 0); | ||
85 | ident_pmd_init(info->pmd_flag, pmd, addr, next); | ||
86 | continue; | ||
87 | } | ||
88 | pmd = (pmd_t *)info->alloc_pgt_page(info->context); | ||
89 | if (!pmd) | ||
90 | return -ENOMEM; | ||
91 | ident_pmd_init(info->pmd_flag, pmd, addr, next); | ||
92 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
93 | } | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, | ||
99 | unsigned long addr, unsigned long end) | ||
100 | { | ||
101 | unsigned long next; | ||
102 | int result; | ||
103 | int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0; | ||
104 | |||
105 | for (; addr < end; addr = next) { | ||
106 | pgd_t *pgd = pgd_page + pgd_index(addr) + off; | ||
107 | pud_t *pud; | ||
108 | |||
109 | next = (addr & PGDIR_MASK) + PGDIR_SIZE; | ||
110 | if (next > end) | ||
111 | next = end; | ||
112 | |||
113 | if (pgd_present(*pgd)) { | ||
114 | pud = pud_offset(pgd, 0); | ||
115 | result = ident_pud_init(info, pud, addr, next); | ||
116 | if (result) | ||
117 | return result; | ||
118 | continue; | ||
119 | } | ||
120 | |||
121 | pud = (pud_t *)info->alloc_pgt_page(info->context); | ||
122 | if (!pud) | ||
123 | return -ENOMEM; | ||
124 | result = ident_pud_init(info, pud, addr, next); | ||
125 | if (result) | ||
126 | return result; | ||
127 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
128 | } | ||
129 | |||
130 | return 0; | ||
131 | } | ||
132 | |||
57 | static int __init parse_direct_gbpages_off(char *arg) | 133 | static int __init parse_direct_gbpages_off(char *arg) |
58 | { | 134 | { |
59 | direct_gbpages = 0; | 135 | direct_gbpages = 0; |
@@ -302,10 +378,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) | |||
302 | void __init cleanup_highmap(void) | 378 | void __init cleanup_highmap(void) |
303 | { | 379 | { |
304 | unsigned long vaddr = __START_KERNEL_map; | 380 | unsigned long vaddr = __START_KERNEL_map; |
305 | unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); | 381 | unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE; |
306 | unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; | 382 | unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; |
307 | pmd_t *pmd = level2_kernel_pgt; | 383 | pmd_t *pmd = level2_kernel_pgt; |
308 | 384 | ||
385 | /* | ||
386 | * Native path, max_pfn_mapped is not set yet. | ||
387 | * Xen has valid max_pfn_mapped set in | ||
388 | * arch/x86/xen/mmu.c:xen_setup_kernel_pagetable(). | ||
389 | */ | ||
390 | if (max_pfn_mapped) | ||
391 | vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); | ||
392 | |||
309 | for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { | 393 | for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { |
310 | if (pmd_none(*pmd)) | 394 | if (pmd_none(*pmd)) |
311 | continue; | 395 | continue; |
@@ -314,69 +398,24 @@ void __init cleanup_highmap(void) | |||
314 | } | 398 | } |
315 | } | 399 | } |
316 | 400 | ||
317 | static __ref void *alloc_low_page(unsigned long *phys) | ||
318 | { | ||
319 | unsigned long pfn = pgt_buf_end++; | ||
320 | void *adr; | ||
321 | |||
322 | if (after_bootmem) { | ||
323 | adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); | ||
324 | *phys = __pa(adr); | ||
325 | |||
326 | return adr; | ||
327 | } | ||
328 | |||
329 | if (pfn >= pgt_buf_top) | ||
330 | panic("alloc_low_page: ran out of memory"); | ||
331 | |||
332 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); | ||
333 | clear_page(adr); | ||
334 | *phys = pfn * PAGE_SIZE; | ||
335 | return adr; | ||
336 | } | ||
337 | |||
338 | static __ref void *map_low_page(void *virt) | ||
339 | { | ||
340 | void *adr; | ||
341 | unsigned long phys, left; | ||
342 | |||
343 | if (after_bootmem) | ||
344 | return virt; | ||
345 | |||
346 | phys = __pa(virt); | ||
347 | left = phys & (PAGE_SIZE - 1); | ||
348 | adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE); | ||
349 | adr = (void *)(((unsigned long)adr) | left); | ||
350 | |||
351 | return adr; | ||
352 | } | ||
353 | |||
354 | static __ref void unmap_low_page(void *adr) | ||
355 | { | ||
356 | if (after_bootmem) | ||
357 | return; | ||
358 | |||
359 | early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE); | ||
360 | } | ||
361 | |||
362 | static unsigned long __meminit | 401 | static unsigned long __meminit |
363 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, | 402 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, |
364 | pgprot_t prot) | 403 | pgprot_t prot) |
365 | { | 404 | { |
366 | unsigned pages = 0; | 405 | unsigned long pages = 0, next; |
367 | unsigned long last_map_addr = end; | 406 | unsigned long last_map_addr = end; |
368 | int i; | 407 | int i; |
369 | 408 | ||
370 | pte_t *pte = pte_page + pte_index(addr); | 409 | pte_t *pte = pte_page + pte_index(addr); |
371 | 410 | ||
372 | for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { | 411 | for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) { |
373 | 412 | next = (addr & PAGE_MASK) + PAGE_SIZE; | |
374 | if (addr >= end) { | 413 | if (addr >= end) { |
375 | if (!after_bootmem) { | 414 | if (!after_bootmem && |
376 | for(; i < PTRS_PER_PTE; i++, pte++) | 415 | !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) && |
377 | set_pte(pte, __pte(0)); | 416 | !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN)) |
378 | } | 417 | set_pte(pte, __pte(0)); |
379 | break; | 418 | continue; |
380 | } | 419 | } |
381 | 420 | ||
382 | /* | 421 | /* |
@@ -414,28 +453,25 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
414 | int i = pmd_index(address); | 453 | int i = pmd_index(address); |
415 | 454 | ||
416 | for (; i < PTRS_PER_PMD; i++, address = next) { | 455 | for (; i < PTRS_PER_PMD; i++, address = next) { |
417 | unsigned long pte_phys; | ||
418 | pmd_t *pmd = pmd_page + pmd_index(address); | 456 | pmd_t *pmd = pmd_page + pmd_index(address); |
419 | pte_t *pte; | 457 | pte_t *pte; |
420 | pgprot_t new_prot = prot; | 458 | pgprot_t new_prot = prot; |
421 | 459 | ||
460 | next = (address & PMD_MASK) + PMD_SIZE; | ||
422 | if (address >= end) { | 461 | if (address >= end) { |
423 | if (!after_bootmem) { | 462 | if (!after_bootmem && |
424 | for (; i < PTRS_PER_PMD; i++, pmd++) | 463 | !e820_any_mapped(address & PMD_MASK, next, E820_RAM) && |
425 | set_pmd(pmd, __pmd(0)); | 464 | !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN)) |
426 | } | 465 | set_pmd(pmd, __pmd(0)); |
427 | break; | 466 | continue; |
428 | } | 467 | } |
429 | 468 | ||
430 | next = (address & PMD_MASK) + PMD_SIZE; | ||
431 | |||
432 | if (pmd_val(*pmd)) { | 469 | if (pmd_val(*pmd)) { |
433 | if (!pmd_large(*pmd)) { | 470 | if (!pmd_large(*pmd)) { |
434 | spin_lock(&init_mm.page_table_lock); | 471 | spin_lock(&init_mm.page_table_lock); |
435 | pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd)); | 472 | pte = (pte_t *)pmd_page_vaddr(*pmd); |
436 | last_map_addr = phys_pte_init(pte, address, | 473 | last_map_addr = phys_pte_init(pte, address, |
437 | end, prot); | 474 | end, prot); |
438 | unmap_low_page(pte); | ||
439 | spin_unlock(&init_mm.page_table_lock); | 475 | spin_unlock(&init_mm.page_table_lock); |
440 | continue; | 476 | continue; |
441 | } | 477 | } |
@@ -464,19 +500,18 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
464 | pages++; | 500 | pages++; |
465 | spin_lock(&init_mm.page_table_lock); | 501 | spin_lock(&init_mm.page_table_lock); |
466 | set_pte((pte_t *)pmd, | 502 | set_pte((pte_t *)pmd, |
467 | pfn_pte(address >> PAGE_SHIFT, | 503 | pfn_pte((address & PMD_MASK) >> PAGE_SHIFT, |
468 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | 504 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); |
469 | spin_unlock(&init_mm.page_table_lock); | 505 | spin_unlock(&init_mm.page_table_lock); |
470 | last_map_addr = next; | 506 | last_map_addr = next; |
471 | continue; | 507 | continue; |
472 | } | 508 | } |
473 | 509 | ||
474 | pte = alloc_low_page(&pte_phys); | 510 | pte = alloc_low_page(); |
475 | last_map_addr = phys_pte_init(pte, address, end, new_prot); | 511 | last_map_addr = phys_pte_init(pte, address, end, new_prot); |
476 | unmap_low_page(pte); | ||
477 | 512 | ||
478 | spin_lock(&init_mm.page_table_lock); | 513 | spin_lock(&init_mm.page_table_lock); |
479 | pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); | 514 | pmd_populate_kernel(&init_mm, pmd, pte); |
480 | spin_unlock(&init_mm.page_table_lock); | 515 | spin_unlock(&init_mm.page_table_lock); |
481 | } | 516 | } |
482 | update_page_count(PG_LEVEL_2M, pages); | 517 | update_page_count(PG_LEVEL_2M, pages); |
@@ -492,27 +527,24 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
492 | int i = pud_index(addr); | 527 | int i = pud_index(addr); |
493 | 528 | ||
494 | for (; i < PTRS_PER_PUD; i++, addr = next) { | 529 | for (; i < PTRS_PER_PUD; i++, addr = next) { |
495 | unsigned long pmd_phys; | ||
496 | pud_t *pud = pud_page + pud_index(addr); | 530 | pud_t *pud = pud_page + pud_index(addr); |
497 | pmd_t *pmd; | 531 | pmd_t *pmd; |
498 | pgprot_t prot = PAGE_KERNEL; | 532 | pgprot_t prot = PAGE_KERNEL; |
499 | 533 | ||
500 | if (addr >= end) | ||
501 | break; | ||
502 | |||
503 | next = (addr & PUD_MASK) + PUD_SIZE; | 534 | next = (addr & PUD_MASK) + PUD_SIZE; |
504 | 535 | if (addr >= end) { | |
505 | if (!after_bootmem && !e820_any_mapped(addr, next, 0)) { | 536 | if (!after_bootmem && |
506 | set_pud(pud, __pud(0)); | 537 | !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) && |
538 | !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN)) | ||
539 | set_pud(pud, __pud(0)); | ||
507 | continue; | 540 | continue; |
508 | } | 541 | } |
509 | 542 | ||
510 | if (pud_val(*pud)) { | 543 | if (pud_val(*pud)) { |
511 | if (!pud_large(*pud)) { | 544 | if (!pud_large(*pud)) { |
512 | pmd = map_low_page(pmd_offset(pud, 0)); | 545 | pmd = pmd_offset(pud, 0); |
513 | last_map_addr = phys_pmd_init(pmd, addr, end, | 546 | last_map_addr = phys_pmd_init(pmd, addr, end, |
514 | page_size_mask, prot); | 547 | page_size_mask, prot); |
515 | unmap_low_page(pmd); | ||
516 | __flush_tlb_all(); | 548 | __flush_tlb_all(); |
517 | continue; | 549 | continue; |
518 | } | 550 | } |
@@ -541,19 +573,19 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
541 | pages++; | 573 | pages++; |
542 | spin_lock(&init_mm.page_table_lock); | 574 | spin_lock(&init_mm.page_table_lock); |
543 | set_pte((pte_t *)pud, | 575 | set_pte((pte_t *)pud, |
544 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 576 | pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT, |
577 | PAGE_KERNEL_LARGE)); | ||
545 | spin_unlock(&init_mm.page_table_lock); | 578 | spin_unlock(&init_mm.page_table_lock); |
546 | last_map_addr = next; | 579 | last_map_addr = next; |
547 | continue; | 580 | continue; |
548 | } | 581 | } |
549 | 582 | ||
550 | pmd = alloc_low_page(&pmd_phys); | 583 | pmd = alloc_low_page(); |
551 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, | 584 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, |
552 | prot); | 585 | prot); |
553 | unmap_low_page(pmd); | ||
554 | 586 | ||
555 | spin_lock(&init_mm.page_table_lock); | 587 | spin_lock(&init_mm.page_table_lock); |
556 | pud_populate(&init_mm, pud, __va(pmd_phys)); | 588 | pud_populate(&init_mm, pud, pmd); |
557 | spin_unlock(&init_mm.page_table_lock); | 589 | spin_unlock(&init_mm.page_table_lock); |
558 | } | 590 | } |
559 | __flush_tlb_all(); | 591 | __flush_tlb_all(); |
@@ -578,28 +610,23 @@ kernel_physical_mapping_init(unsigned long start, | |||
578 | 610 | ||
579 | for (; start < end; start = next) { | 611 | for (; start < end; start = next) { |
580 | pgd_t *pgd = pgd_offset_k(start); | 612 | pgd_t *pgd = pgd_offset_k(start); |
581 | unsigned long pud_phys; | ||
582 | pud_t *pud; | 613 | pud_t *pud; |
583 | 614 | ||
584 | next = (start + PGDIR_SIZE) & PGDIR_MASK; | 615 | next = (start & PGDIR_MASK) + PGDIR_SIZE; |
585 | if (next > end) | ||
586 | next = end; | ||
587 | 616 | ||
588 | if (pgd_val(*pgd)) { | 617 | if (pgd_val(*pgd)) { |
589 | pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd)); | 618 | pud = (pud_t *)pgd_page_vaddr(*pgd); |
590 | last_map_addr = phys_pud_init(pud, __pa(start), | 619 | last_map_addr = phys_pud_init(pud, __pa(start), |
591 | __pa(end), page_size_mask); | 620 | __pa(end), page_size_mask); |
592 | unmap_low_page(pud); | ||
593 | continue; | 621 | continue; |
594 | } | 622 | } |
595 | 623 | ||
596 | pud = alloc_low_page(&pud_phys); | 624 | pud = alloc_low_page(); |
597 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), | 625 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(end), |
598 | page_size_mask); | 626 | page_size_mask); |
599 | unmap_low_page(pud); | ||
600 | 627 | ||
601 | spin_lock(&init_mm.page_table_lock); | 628 | spin_lock(&init_mm.page_table_lock); |
602 | pgd_populate(&init_mm, pgd, __va(pud_phys)); | 629 | pgd_populate(&init_mm, pgd, pud); |
603 | spin_unlock(&init_mm.page_table_lock); | 630 | spin_unlock(&init_mm.page_table_lock); |
604 | pgd_changed = true; | 631 | pgd_changed = true; |
605 | } | 632 | } |
@@ -664,13 +691,11 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
664 | { | 691 | { |
665 | struct pglist_data *pgdat = NODE_DATA(nid); | 692 | struct pglist_data *pgdat = NODE_DATA(nid); |
666 | struct zone *zone = pgdat->node_zones + ZONE_NORMAL; | 693 | struct zone *zone = pgdat->node_zones + ZONE_NORMAL; |
667 | unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT; | 694 | unsigned long start_pfn = start >> PAGE_SHIFT; |
668 | unsigned long nr_pages = size >> PAGE_SHIFT; | 695 | unsigned long nr_pages = size >> PAGE_SHIFT; |
669 | int ret; | 696 | int ret; |
670 | 697 | ||
671 | last_mapped_pfn = init_memory_mapping(start, start + size); | 698 | init_memory_mapping(start, start + size); |
672 | if (last_mapped_pfn > max_pfn_mapped) | ||
673 | max_pfn_mapped = last_mapped_pfn; | ||
674 | 699 | ||
675 | ret = __add_pages(nid, zone, start_pfn, nr_pages); | 700 | ret = __add_pages(nid, zone, start_pfn, nr_pages); |
676 | WARN_ON_ONCE(ret); | 701 | WARN_ON_ONCE(ret); |
@@ -686,6 +711,16 @@ EXPORT_SYMBOL_GPL(arch_add_memory); | |||
686 | 711 | ||
687 | static struct kcore_list kcore_vsyscall; | 712 | static struct kcore_list kcore_vsyscall; |
688 | 713 | ||
714 | static void __init register_page_bootmem_info(void) | ||
715 | { | ||
716 | #ifdef CONFIG_NUMA | ||
717 | int i; | ||
718 | |||
719 | for_each_online_node(i) | ||
720 | register_page_bootmem_info_node(NODE_DATA(i)); | ||
721 | #endif | ||
722 | } | ||
723 | |||
689 | void __init mem_init(void) | 724 | void __init mem_init(void) |
690 | { | 725 | { |
691 | long codesize, reservedpages, datasize, initsize; | 726 | long codesize, reservedpages, datasize, initsize; |
@@ -698,11 +733,8 @@ void __init mem_init(void) | |||
698 | reservedpages = 0; | 733 | reservedpages = 0; |
699 | 734 | ||
700 | /* this will put all low memory onto the freelists */ | 735 | /* this will put all low memory onto the freelists */ |
701 | #ifdef CONFIG_NUMA | 736 | register_page_bootmem_info(); |
702 | totalram_pages = numa_free_all_bootmem(); | ||
703 | #else | ||
704 | totalram_pages = free_all_bootmem(); | 737 | totalram_pages = free_all_bootmem(); |
705 | #endif | ||
706 | 738 | ||
707 | absent_pages = absent_pages_in_range(0, max_pfn); | 739 | absent_pages = absent_pages_in_range(0, max_pfn); |
708 | reservedpages = max_pfn - totalram_pages - absent_pages; | 740 | reservedpages = max_pfn - totalram_pages - absent_pages; |
@@ -772,12 +804,11 @@ void set_kernel_text_ro(void) | |||
772 | void mark_rodata_ro(void) | 804 | void mark_rodata_ro(void) |
773 | { | 805 | { |
774 | unsigned long start = PFN_ALIGN(_text); | 806 | unsigned long start = PFN_ALIGN(_text); |
775 | unsigned long rodata_start = | 807 | unsigned long rodata_start = PFN_ALIGN(__start_rodata); |
776 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | ||
777 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; | 808 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; |
778 | unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); | 809 | unsigned long text_end = PFN_ALIGN(&__stop___ex_table); |
779 | unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); | 810 | unsigned long rodata_end = PFN_ALIGN(&__end_rodata); |
780 | unsigned long data_start = (unsigned long) &_sdata; | 811 | unsigned long all_end = PFN_ALIGN(&_end); |
781 | 812 | ||
782 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | 813 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
783 | (end - start) >> 10); | 814 | (end - start) >> 10); |
@@ -786,10 +817,10 @@ void mark_rodata_ro(void) | |||
786 | kernel_set_to_readonly = 1; | 817 | kernel_set_to_readonly = 1; |
787 | 818 | ||
788 | /* | 819 | /* |
789 | * The rodata section (but not the kernel text!) should also be | 820 | * The rodata/data/bss/brk section (but not the kernel text!) |
790 | * not-executable. | 821 | * should also be not-executable. |
791 | */ | 822 | */ |
792 | set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT); | 823 | set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); |
793 | 824 | ||
794 | rodata_test(); | 825 | rodata_test(); |
795 | 826 | ||
@@ -802,12 +833,12 @@ void mark_rodata_ro(void) | |||
802 | #endif | 833 | #endif |
803 | 834 | ||
804 | free_init_pages("unused kernel memory", | 835 | free_init_pages("unused kernel memory", |
805 | (unsigned long) page_address(virt_to_page(text_end)), | 836 | (unsigned long) __va(__pa_symbol(text_end)), |
806 | (unsigned long) | 837 | (unsigned long) __va(__pa_symbol(rodata_start))); |
807 | page_address(virt_to_page(rodata_start))); | 838 | |
808 | free_init_pages("unused kernel memory", | 839 | free_init_pages("unused kernel memory", |
809 | (unsigned long) page_address(virt_to_page(rodata_end)), | 840 | (unsigned long) __va(__pa_symbol(rodata_end)), |
810 | (unsigned long) page_address(virt_to_page(data_start))); | 841 | (unsigned long) __va(__pa_symbol(_sdata))); |
811 | } | 842 | } |
812 | 843 | ||
813 | #endif | 844 | #endif |
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h new file mode 100644 index 000000000000..6b563a118891 --- /dev/null +++ b/arch/x86/mm/mm_internal.h | |||
@@ -0,0 +1,19 @@ | |||
1 | #ifndef __X86_MM_INTERNAL_H | ||
2 | #define __X86_MM_INTERNAL_H | ||
3 | |||
4 | void *alloc_low_pages(unsigned int num); | ||
5 | static inline void *alloc_low_page(void) | ||
6 | { | ||
7 | return alloc_low_pages(1); | ||
8 | } | ||
9 | |||
10 | void early_ioremap_page_table_range_init(void); | ||
11 | |||
12 | unsigned long kernel_physical_mapping_init(unsigned long start, | ||
13 | unsigned long end, | ||
14 | unsigned long page_size_mask); | ||
15 | void zone_sizes_init(void); | ||
16 | |||
17 | extern int after_bootmem; | ||
18 | |||
19 | #endif /* __X86_MM_INTERNAL_H */ | ||
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 2d125be1bae9..8504f3698753 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -193,7 +193,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) | |||
193 | static void __init setup_node_data(int nid, u64 start, u64 end) | 193 | static void __init setup_node_data(int nid, u64 start, u64 end) |
194 | { | 194 | { |
195 | const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | 195 | const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); |
196 | bool remapped = false; | ||
197 | u64 nd_pa; | 196 | u64 nd_pa; |
198 | void *nd; | 197 | void *nd; |
199 | int tnid; | 198 | int tnid; |
@@ -205,37 +204,28 @@ static void __init setup_node_data(int nid, u64 start, u64 end) | |||
205 | if (end && (end - start) < NODE_MIN_SIZE) | 204 | if (end && (end - start) < NODE_MIN_SIZE) |
206 | return; | 205 | return; |
207 | 206 | ||
208 | /* initialize remap allocator before aligning to ZONE_ALIGN */ | ||
209 | init_alloc_remap(nid, start, end); | ||
210 | |||
211 | start = roundup(start, ZONE_ALIGN); | 207 | start = roundup(start, ZONE_ALIGN); |
212 | 208 | ||
213 | printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", | 209 | printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", |
214 | nid, start, end - 1); | 210 | nid, start, end - 1); |
215 | 211 | ||
216 | /* | 212 | /* |
217 | * Allocate node data. Try remap allocator first, node-local | 213 | * Allocate node data. Try node-local memory and then any node. |
218 | * memory and then any node. Never allocate in DMA zone. | 214 | * Never allocate in DMA zone. |
219 | */ | 215 | */ |
220 | nd = alloc_remap(nid, nd_size); | 216 | nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); |
221 | if (nd) { | 217 | if (!nd_pa) { |
222 | nd_pa = __pa(nd); | 218 | pr_err("Cannot find %zu bytes in node %d\n", |
223 | remapped = true; | 219 | nd_size, nid); |
224 | } else { | 220 | return; |
225 | nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); | ||
226 | if (!nd_pa) { | ||
227 | pr_err("Cannot find %zu bytes in node %d\n", | ||
228 | nd_size, nid); | ||
229 | return; | ||
230 | } | ||
231 | nd = __va(nd_pa); | ||
232 | } | 221 | } |
222 | nd = __va(nd_pa); | ||
233 | 223 | ||
234 | /* report and initialize */ | 224 | /* report and initialize */ |
235 | printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n", | 225 | printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n", |
236 | nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); | 226 | nd_pa, nd_pa + nd_size - 1); |
237 | tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); | 227 | tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); |
238 | if (!remapped && tnid != nid) | 228 | if (tnid != nid) |
239 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); | 229 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); |
240 | 230 | ||
241 | node_data[nid] = nd; | 231 | node_data[nid] = nd; |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 534255a36b6b..73a6d7395bd3 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -73,167 +73,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, | |||
73 | 73 | ||
74 | extern unsigned long highend_pfn, highstart_pfn; | 74 | extern unsigned long highend_pfn, highstart_pfn; |
75 | 75 | ||
76 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) | ||
77 | |||
78 | static void *node_remap_start_vaddr[MAX_NUMNODES]; | ||
79 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | ||
80 | |||
81 | /* | ||
82 | * Remap memory allocator | ||
83 | */ | ||
84 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; | ||
85 | static void *node_remap_end_vaddr[MAX_NUMNODES]; | ||
86 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; | ||
87 | |||
88 | /** | ||
89 | * alloc_remap - Allocate remapped memory | ||
90 | * @nid: NUMA node to allocate memory from | ||
91 | * @size: The size of allocation | ||
92 | * | ||
93 | * Allocate @size bytes from the remap area of NUMA node @nid. The | ||
94 | * size of the remap area is predetermined by init_alloc_remap() and | ||
95 | * only the callers considered there should call this function. For | ||
96 | * more info, please read the comment on top of init_alloc_remap(). | ||
97 | * | ||
98 | * The caller must be ready to handle allocation failure from this | ||
99 | * function and fall back to regular memory allocator in such cases. | ||
100 | * | ||
101 | * CONTEXT: | ||
102 | * Single CPU early boot context. | ||
103 | * | ||
104 | * RETURNS: | ||
105 | * Pointer to the allocated memory on success, %NULL on failure. | ||
106 | */ | ||
107 | void *alloc_remap(int nid, unsigned long size) | ||
108 | { | ||
109 | void *allocation = node_remap_alloc_vaddr[nid]; | ||
110 | |||
111 | size = ALIGN(size, L1_CACHE_BYTES); | ||
112 | |||
113 | if (!allocation || (allocation + size) > node_remap_end_vaddr[nid]) | ||
114 | return NULL; | ||
115 | |||
116 | node_remap_alloc_vaddr[nid] += size; | ||
117 | memset(allocation, 0, size); | ||
118 | |||
119 | return allocation; | ||
120 | } | ||
121 | |||
122 | #ifdef CONFIG_HIBERNATION | ||
123 | /** | ||
124 | * resume_map_numa_kva - add KVA mapping to the temporary page tables created | ||
125 | * during resume from hibernation | ||
126 | * @pgd_base - temporary resume page directory | ||
127 | */ | ||
128 | void resume_map_numa_kva(pgd_t *pgd_base) | ||
129 | { | ||
130 | int node; | ||
131 | |||
132 | for_each_online_node(node) { | ||
133 | unsigned long start_va, start_pfn, nr_pages, pfn; | ||
134 | |||
135 | start_va = (unsigned long)node_remap_start_vaddr[node]; | ||
136 | start_pfn = node_remap_start_pfn[node]; | ||
137 | nr_pages = (node_remap_end_vaddr[node] - | ||
138 | node_remap_start_vaddr[node]) >> PAGE_SHIFT; | ||
139 | |||
140 | printk(KERN_DEBUG "%s: node %d\n", __func__, node); | ||
141 | |||
142 | for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) { | ||
143 | unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); | ||
144 | pgd_t *pgd = pgd_base + pgd_index(vaddr); | ||
145 | pud_t *pud = pud_offset(pgd, vaddr); | ||
146 | pmd_t *pmd = pmd_offset(pud, vaddr); | ||
147 | |||
148 | set_pmd(pmd, pfn_pmd(start_pfn + pfn, | ||
149 | PAGE_KERNEL_LARGE_EXEC)); | ||
150 | |||
151 | printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n", | ||
152 | __func__, vaddr, start_pfn + pfn); | ||
153 | } | ||
154 | } | ||
155 | } | ||
156 | #endif | ||
157 | |||
158 | /** | ||
159 | * init_alloc_remap - Initialize remap allocator for a NUMA node | ||
160 | * @nid: NUMA node to initizlie remap allocator for | ||
161 | * | ||
162 | * NUMA nodes may end up without any lowmem. As allocating pgdat and | ||
163 | * memmap on a different node with lowmem is inefficient, a special | ||
164 | * remap allocator is implemented which can be used by alloc_remap(). | ||
165 | * | ||
166 | * For each node, the amount of memory which will be necessary for | ||
167 | * pgdat and memmap is calculated and two memory areas of the size are | ||
168 | * allocated - one in the node and the other in lowmem; then, the area | ||
169 | * in the node is remapped to the lowmem area. | ||
170 | * | ||
171 | * As pgdat and memmap must be allocated in lowmem anyway, this | ||
172 | * doesn't waste lowmem address space; however, the actual lowmem | ||
173 | * which gets remapped over is wasted. The amount shouldn't be | ||
174 | * problematic on machines this feature will be used. | ||
175 | * | ||
176 | * Initialization failure isn't fatal. alloc_remap() is used | ||
177 | * opportunistically and the callers will fall back to other memory | ||
178 | * allocation mechanisms on failure. | ||
179 | */ | ||
180 | void __init init_alloc_remap(int nid, u64 start, u64 end) | ||
181 | { | ||
182 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
183 | unsigned long end_pfn = end >> PAGE_SHIFT; | ||
184 | unsigned long size, pfn; | ||
185 | u64 node_pa, remap_pa; | ||
186 | void *remap_va; | ||
187 | |||
188 | /* | ||
189 | * The acpi/srat node info can show hot-add memroy zones where | ||
190 | * memory could be added but not currently present. | ||
191 | */ | ||
192 | printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", | ||
193 | nid, start_pfn, end_pfn); | ||
194 | |||
195 | /* calculate the necessary space aligned to large page size */ | ||
196 | size = node_memmap_size_bytes(nid, start_pfn, end_pfn); | ||
197 | size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); | ||
198 | size = ALIGN(size, LARGE_PAGE_BYTES); | ||
199 | |||
200 | /* allocate node memory and the lowmem remap area */ | ||
201 | node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); | ||
202 | if (!node_pa) { | ||
203 | pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", | ||
204 | size, nid); | ||
205 | return; | ||
206 | } | ||
207 | memblock_reserve(node_pa, size); | ||
208 | |||
209 | remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, | ||
210 | max_low_pfn << PAGE_SHIFT, | ||
211 | size, LARGE_PAGE_BYTES); | ||
212 | if (!remap_pa) { | ||
213 | pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", | ||
214 | size, nid); | ||
215 | memblock_free(node_pa, size); | ||
216 | return; | ||
217 | } | ||
218 | memblock_reserve(remap_pa, size); | ||
219 | remap_va = phys_to_virt(remap_pa); | ||
220 | |||
221 | /* perform actual remap */ | ||
222 | for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE) | ||
223 | set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT), | ||
224 | (node_pa >> PAGE_SHIFT) + pfn, | ||
225 | PAGE_KERNEL_LARGE); | ||
226 | |||
227 | /* initialize remap allocator parameters */ | ||
228 | node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; | ||
229 | node_remap_start_vaddr[nid] = remap_va; | ||
230 | node_remap_end_vaddr[nid] = remap_va + size; | ||
231 | node_remap_alloc_vaddr[nid] = remap_va; | ||
232 | |||
233 | printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", | ||
234 | nid, node_pa, node_pa + size, remap_va, remap_va + size); | ||
235 | } | ||
236 | |||
237 | void __init initmem_init(void) | 76 | void __init initmem_init(void) |
238 | { | 77 | { |
239 | x86_numa_init(); | 78 | x86_numa_init(); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 92e27119ee1a..9405ffc91502 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -10,16 +10,3 @@ void __init initmem_init(void) | |||
10 | { | 10 | { |
11 | x86_numa_init(); | 11 | x86_numa_init(); |
12 | } | 12 | } |
13 | |||
14 | unsigned long __init numa_free_all_bootmem(void) | ||
15 | { | ||
16 | unsigned long pages = 0; | ||
17 | int i; | ||
18 | |||
19 | for_each_online_node(i) | ||
20 | pages += free_all_bootmem_node(NODE_DATA(i)); | ||
21 | |||
22 | pages += free_low_memory_core_early(MAX_NUMNODES); | ||
23 | |||
24 | return pages; | ||
25 | } | ||
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h index 7178c3afe05e..ad86ec91e640 100644 --- a/arch/x86/mm/numa_internal.h +++ b/arch/x86/mm/numa_internal.h | |||
@@ -21,12 +21,6 @@ void __init numa_reset_distance(void); | |||
21 | 21 | ||
22 | void __init x86_numa_init(void); | 22 | void __init x86_numa_init(void); |
23 | 23 | ||
24 | #ifdef CONFIG_X86_64 | ||
25 | static inline void init_alloc_remap(int nid, u64 start, u64 end) { } | ||
26 | #else | ||
27 | void __init init_alloc_remap(int nid, u64 start, u64 end); | ||
28 | #endif | ||
29 | |||
30 | #ifdef CONFIG_NUMA_EMU | 24 | #ifdef CONFIG_NUMA_EMU |
31 | void __init numa_emulation(struct numa_meminfo *numa_meminfo, | 25 | void __init numa_emulation(struct numa_meminfo *numa_meminfo, |
32 | int numa_dist_cnt); | 26 | int numa_dist_cnt); |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a718e0d23503..a1b1c88f9caf 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -94,12 +94,12 @@ static inline void split_page_count(int level) { } | |||
94 | 94 | ||
95 | static inline unsigned long highmap_start_pfn(void) | 95 | static inline unsigned long highmap_start_pfn(void) |
96 | { | 96 | { |
97 | return __pa(_text) >> PAGE_SHIFT; | 97 | return __pa_symbol(_text) >> PAGE_SHIFT; |
98 | } | 98 | } |
99 | 99 | ||
100 | static inline unsigned long highmap_end_pfn(void) | 100 | static inline unsigned long highmap_end_pfn(void) |
101 | { | 101 | { |
102 | return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; | 102 | return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; |
103 | } | 103 | } |
104 | 104 | ||
105 | #endif | 105 | #endif |
@@ -276,8 +276,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
276 | * The .rodata section needs to be read-only. Using the pfn | 276 | * The .rodata section needs to be read-only. Using the pfn |
277 | * catches all aliases. | 277 | * catches all aliases. |
278 | */ | 278 | */ |
279 | if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, | 279 | if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, |
280 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) | 280 | __pa_symbol(__end_rodata) >> PAGE_SHIFT)) |
281 | pgprot_val(forbidden) |= _PAGE_RW; | 281 | pgprot_val(forbidden) |= _PAGE_RW; |
282 | 282 | ||
283 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | 283 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) |
@@ -364,6 +364,37 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) | |||
364 | EXPORT_SYMBOL_GPL(lookup_address); | 364 | EXPORT_SYMBOL_GPL(lookup_address); |
365 | 365 | ||
366 | /* | 366 | /* |
367 | * This is necessary because __pa() does not work on some | ||
368 | * kinds of memory, like vmalloc() or the alloc_remap() | ||
369 | * areas on 32-bit NUMA systems. The percpu areas can | ||
370 | * end up in this kind of memory, for instance. | ||
371 | * | ||
372 | * This could be optimized, but it is only intended to be | ||
373 | * used at inititalization time, and keeping it | ||
374 | * unoptimized should increase the testing coverage for | ||
375 | * the more obscure platforms. | ||
376 | */ | ||
377 | phys_addr_t slow_virt_to_phys(void *__virt_addr) | ||
378 | { | ||
379 | unsigned long virt_addr = (unsigned long)__virt_addr; | ||
380 | phys_addr_t phys_addr; | ||
381 | unsigned long offset; | ||
382 | enum pg_level level; | ||
383 | unsigned long psize; | ||
384 | unsigned long pmask; | ||
385 | pte_t *pte; | ||
386 | |||
387 | pte = lookup_address(virt_addr, &level); | ||
388 | BUG_ON(!pte); | ||
389 | psize = page_level_size(level); | ||
390 | pmask = page_level_mask(level); | ||
391 | offset = virt_addr & ~pmask; | ||
392 | phys_addr = pte_pfn(*pte) << PAGE_SHIFT; | ||
393 | return (phys_addr | offset); | ||
394 | } | ||
395 | EXPORT_SYMBOL_GPL(slow_virt_to_phys); | ||
396 | |||
397 | /* | ||
367 | * Set the new pmd in all the pgds we know about: | 398 | * Set the new pmd in all the pgds we know about: |
368 | */ | 399 | */ |
369 | static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) | 400 | static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) |
@@ -396,7 +427,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
396 | pte_t new_pte, old_pte, *tmp; | 427 | pte_t new_pte, old_pte, *tmp; |
397 | pgprot_t old_prot, new_prot, req_prot; | 428 | pgprot_t old_prot, new_prot, req_prot; |
398 | int i, do_split = 1; | 429 | int i, do_split = 1; |
399 | unsigned int level; | 430 | enum pg_level level; |
400 | 431 | ||
401 | if (cpa->force_split) | 432 | if (cpa->force_split) |
402 | return 1; | 433 | return 1; |
@@ -412,15 +443,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
412 | 443 | ||
413 | switch (level) { | 444 | switch (level) { |
414 | case PG_LEVEL_2M: | 445 | case PG_LEVEL_2M: |
415 | psize = PMD_PAGE_SIZE; | ||
416 | pmask = PMD_PAGE_MASK; | ||
417 | break; | ||
418 | #ifdef CONFIG_X86_64 | 446 | #ifdef CONFIG_X86_64 |
419 | case PG_LEVEL_1G: | 447 | case PG_LEVEL_1G: |
420 | psize = PUD_PAGE_SIZE; | ||
421 | pmask = PUD_PAGE_MASK; | ||
422 | break; | ||
423 | #endif | 448 | #endif |
449 | psize = page_level_size(level); | ||
450 | pmask = page_level_mask(level); | ||
451 | break; | ||
424 | default: | 452 | default: |
425 | do_split = -EINVAL; | 453 | do_split = -EINVAL; |
426 | goto out_unlock; | 454 | goto out_unlock; |
@@ -551,16 +579,10 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
551 | for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) | 579 | for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) |
552 | set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); | 580 | set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); |
553 | 581 | ||
554 | if (address >= (unsigned long)__va(0) && | 582 | if (pfn_range_is_mapped(PFN_DOWN(__pa(address)), |
555 | address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT)) | 583 | PFN_DOWN(__pa(address)) + 1)) |
556 | split_page_count(level); | 584 | split_page_count(level); |
557 | 585 | ||
558 | #ifdef CONFIG_X86_64 | ||
559 | if (address >= (unsigned long)__va(1UL<<32) && | ||
560 | address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) | ||
561 | split_page_count(level); | ||
562 | #endif | ||
563 | |||
564 | /* | 586 | /* |
565 | * Install the new, split up pagetable. | 587 | * Install the new, split up pagetable. |
566 | * | 588 | * |
@@ -729,13 +751,9 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
729 | unsigned long vaddr; | 751 | unsigned long vaddr; |
730 | int ret; | 752 | int ret; |
731 | 753 | ||
732 | if (cpa->pfn >= max_pfn_mapped) | 754 | if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1)) |
733 | return 0; | 755 | return 0; |
734 | 756 | ||
735 | #ifdef CONFIG_X86_64 | ||
736 | if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT))) | ||
737 | return 0; | ||
738 | #endif | ||
739 | /* | 757 | /* |
740 | * No need to redo, when the primary call touched the direct | 758 | * No need to redo, when the primary call touched the direct |
741 | * mapping already: | 759 | * mapping already: |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 0eb572eda406..2610bd93c896 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -560,10 +560,10 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) | |||
560 | { | 560 | { |
561 | unsigned long id_sz; | 561 | unsigned long id_sz; |
562 | 562 | ||
563 | if (base >= __pa(high_memory)) | 563 | if (base > __pa(high_memory-1)) |
564 | return 0; | 564 | return 0; |
565 | 565 | ||
566 | id_sz = (__pa(high_memory) < base + size) ? | 566 | id_sz = (__pa(high_memory-1) <= base + size) ? |
567 | __pa(high_memory) - base : | 567 | __pa(high_memory) - base : |
568 | size; | 568 | size; |
569 | 569 | ||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e27fbf887f3b..193350b51f90 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -334,7 +334,12 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, | |||
334 | if (changed && dirty) { | 334 | if (changed && dirty) { |
335 | *pmdp = entry; | 335 | *pmdp = entry; |
336 | pmd_update_defer(vma->vm_mm, address, pmdp); | 336 | pmd_update_defer(vma->vm_mm, address, pmdp); |
337 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | 337 | /* |
338 | * We had a write-protection fault here and changed the pmd | ||
339 | * to to more permissive. No need to flush the TLB for that, | ||
340 | * #PF is architecturally guaranteed to do that and in the | ||
341 | * worst-case we'll generate a spurious fault. | ||
342 | */ | ||
338 | } | 343 | } |
339 | 344 | ||
340 | return changed; | 345 | return changed; |
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index d2e2735327b4..e666cbbb9261 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c | |||
@@ -1,3 +1,4 @@ | |||
1 | #include <linux/bootmem.h> | ||
1 | #include <linux/mmdebug.h> | 2 | #include <linux/mmdebug.h> |
2 | #include <linux/module.h> | 3 | #include <linux/module.h> |
3 | #include <linux/mm.h> | 4 | #include <linux/mm.h> |
@@ -8,33 +9,54 @@ | |||
8 | 9 | ||
9 | #ifdef CONFIG_X86_64 | 10 | #ifdef CONFIG_X86_64 |
10 | 11 | ||
12 | #ifdef CONFIG_DEBUG_VIRTUAL | ||
11 | unsigned long __phys_addr(unsigned long x) | 13 | unsigned long __phys_addr(unsigned long x) |
12 | { | 14 | { |
13 | if (x >= __START_KERNEL_map) { | 15 | unsigned long y = x - __START_KERNEL_map; |
14 | x -= __START_KERNEL_map; | 16 | |
15 | VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); | 17 | /* use the carry flag to determine if x was < __START_KERNEL_map */ |
16 | x += phys_base; | 18 | if (unlikely(x > y)) { |
19 | x = y + phys_base; | ||
20 | |||
21 | VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); | ||
17 | } else { | 22 | } else { |
18 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); | 23 | x = y + (__START_KERNEL_map - PAGE_OFFSET); |
19 | x -= PAGE_OFFSET; | 24 | |
20 | VIRTUAL_BUG_ON(!phys_addr_valid(x)); | 25 | /* carry flag will be set if starting x was >= PAGE_OFFSET */ |
26 | VIRTUAL_BUG_ON((x > y) || !phys_addr_valid(x)); | ||
21 | } | 27 | } |
28 | |||
22 | return x; | 29 | return x; |
23 | } | 30 | } |
24 | EXPORT_SYMBOL(__phys_addr); | 31 | EXPORT_SYMBOL(__phys_addr); |
25 | 32 | ||
33 | unsigned long __phys_addr_symbol(unsigned long x) | ||
34 | { | ||
35 | unsigned long y = x - __START_KERNEL_map; | ||
36 | |||
37 | /* only check upper bounds since lower bounds will trigger carry */ | ||
38 | VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); | ||
39 | |||
40 | return y + phys_base; | ||
41 | } | ||
42 | EXPORT_SYMBOL(__phys_addr_symbol); | ||
43 | #endif | ||
44 | |||
26 | bool __virt_addr_valid(unsigned long x) | 45 | bool __virt_addr_valid(unsigned long x) |
27 | { | 46 | { |
28 | if (x >= __START_KERNEL_map) { | 47 | unsigned long y = x - __START_KERNEL_map; |
29 | x -= __START_KERNEL_map; | 48 | |
30 | if (x >= KERNEL_IMAGE_SIZE) | 49 | /* use the carry flag to determine if x was < __START_KERNEL_map */ |
50 | if (unlikely(x > y)) { | ||
51 | x = y + phys_base; | ||
52 | |||
53 | if (y >= KERNEL_IMAGE_SIZE) | ||
31 | return false; | 54 | return false; |
32 | x += phys_base; | ||
33 | } else { | 55 | } else { |
34 | if (x < PAGE_OFFSET) | 56 | x = y + (__START_KERNEL_map - PAGE_OFFSET); |
35 | return false; | 57 | |
36 | x -= PAGE_OFFSET; | 58 | /* carry flag will be set if starting x was >= PAGE_OFFSET */ |
37 | if (!phys_addr_valid(x)) | 59 | if ((x > y) || !phys_addr_valid(x)) |
38 | return false; | 60 | return false; |
39 | } | 61 | } |
40 | 62 | ||
@@ -47,10 +69,16 @@ EXPORT_SYMBOL(__virt_addr_valid); | |||
47 | #ifdef CONFIG_DEBUG_VIRTUAL | 69 | #ifdef CONFIG_DEBUG_VIRTUAL |
48 | unsigned long __phys_addr(unsigned long x) | 70 | unsigned long __phys_addr(unsigned long x) |
49 | { | 71 | { |
72 | unsigned long phys_addr = x - PAGE_OFFSET; | ||
50 | /* VMALLOC_* aren't constants */ | 73 | /* VMALLOC_* aren't constants */ |
51 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); | 74 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); |
52 | VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); | 75 | VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); |
53 | return x - PAGE_OFFSET; | 76 | /* max_low_pfn is set early, but not _that_ early */ |
77 | if (max_low_pfn) { | ||
78 | VIRTUAL_BUG_ON((phys_addr >> PAGE_SHIFT) > max_low_pfn); | ||
79 | BUG_ON(slow_virt_to_phys((void *)x) != phys_addr); | ||
80 | } | ||
81 | return phys_addr; | ||
54 | } | 82 | } |
55 | EXPORT_SYMBOL(__phys_addr); | 83 | EXPORT_SYMBOL(__phys_addr); |
56 | #endif | 84 | #endif |