aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig14
-rw-r--r--arch/x86/include/asm/elf.h1
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/devicetree.c4
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/setup.c8
-rw-r--r--arch/x86/kernel/sys_x86_64.c30
-rw-r--r--arch/x86/mm/init.c68
-rw-r--r--arch/x86/mm/init_64.c14
-rw-r--r--arch/x86/mm/pageattr.c4
-rw-r--r--arch/x86/mm/pat.c6
-rw-r--r--arch/x86/mm/pgtable.c81
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c4
-rw-r--r--include/linux/init.h38
14 files changed, 189 insertions, 89 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 867bc5bea8dc..faff6934c05a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1295,14 +1295,14 @@ config ARCH_DMA_ADDR_T_64BIT
1295 def_bool y 1295 def_bool y
1296 depends on X86_64 || HIGHMEM64G 1296 depends on X86_64 || HIGHMEM64G
1297 1297
1298config DIRECT_GBPAGES 1298config X86_DIRECT_GBPAGES
1299 bool "Enable 1GB pages for kernel pagetables" if EXPERT 1299 def_bool y
1300 default y 1300 depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK
1301 depends on X86_64
1302 ---help--- 1301 ---help---
1303 Allow the kernel linear mapping to use 1GB pages on CPUs that 1302 Certain kernel features effectively disable kernel
1304 support it. This can improve the kernel's performance a tiny bit by 1303 linear 1 GB mappings (even if the CPU otherwise
1305 reducing TLB pressure. If in doubt, say "Y". 1304 supports them), so don't confuse the user by printing
1305 that we have them enabled.
1306 1306
1307# Common NUMA Features 1307# Common NUMA Features
1308config NUMA 1308config NUMA
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 3563107b5060..935588d95c82 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -366,6 +366,7 @@ enum align_flags {
366struct va_alignment { 366struct va_alignment {
367 int flags; 367 int flags;
368 unsigned long mask; 368 unsigned long mask;
369 unsigned long bits;
369} ____cacheline_aligned; 370} ____cacheline_aligned;
370 371
371extern struct va_alignment va_align; 372extern struct va_alignment va_align;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index dd9e50500297..fd470ebf924e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
5 5
6#include <linux/io.h> 6#include <linux/io.h>
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/random.h>
8#include <asm/processor.h> 9#include <asm/processor.h>
9#include <asm/apic.h> 10#include <asm/apic.h>
10#include <asm/cpu.h> 11#include <asm/cpu.h>
@@ -488,6 +489,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
488 489
489 va_align.mask = (upperbit - 1) & PAGE_MASK; 490 va_align.mask = (upperbit - 1) & PAGE_MASK;
490 va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; 491 va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
492
493 /* A random value per boot for bit slice [12:upper_bit) */
494 va_align.bits = get_random_int() & va_align.mask;
491 } 495 }
492} 496}
493 497
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3d3503351242..6367a780cc8c 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -286,13 +286,13 @@ static void __init x86_flattree_get_config(void)
286 initial_boot_params = dt = early_memremap(initial_dtb, map_len); 286 initial_boot_params = dt = early_memremap(initial_dtb, map_len);
287 size = of_get_flat_dt_size(); 287 size = of_get_flat_dt_size();
288 if (map_len < size) { 288 if (map_len < size) {
289 early_iounmap(dt, map_len); 289 early_memunmap(dt, map_len);
290 initial_boot_params = dt = early_memremap(initial_dtb, size); 290 initial_boot_params = dt = early_memremap(initial_dtb, size);
291 map_len = size; 291 map_len = size;
292 } 292 }
293 293
294 unflatten_and_copy_device_tree(); 294 unflatten_and_copy_device_tree();
295 early_iounmap(dt, map_len); 295 early_memunmap(dt, map_len);
296} 296}
297#else 297#else
298static inline void x86_flattree_get_config(void) { } 298static inline void x86_flattree_get_config(void) { }
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 46201deee923..7d46bb260334 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -661,7 +661,7 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len)
661 extmap = (struct e820entry *)(sdata->data); 661 extmap = (struct e820entry *)(sdata->data);
662 __append_e820_map(extmap, entries); 662 __append_e820_map(extmap, entries);
663 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 663 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
664 early_iounmap(sdata, data_len); 664 early_memunmap(sdata, data_len);
665 printk(KERN_INFO "e820: extended physical RAM map:\n"); 665 printk(KERN_INFO "e820: extended physical RAM map:\n");
666 e820_print_map("extended"); 666 e820_print_map("extended");
667} 667}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 014466b152b5..d74ac33290ae 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -354,7 +354,7 @@ static void __init relocate_initrd(void)
354 mapaddr = ramdisk_image & PAGE_MASK; 354 mapaddr = ramdisk_image & PAGE_MASK;
355 p = early_memremap(mapaddr, clen+slop); 355 p = early_memremap(mapaddr, clen+slop);
356 memcpy(q, p+slop, clen); 356 memcpy(q, p+slop, clen);
357 early_iounmap(p, clen+slop); 357 early_memunmap(p, clen+slop);
358 q += clen; 358 q += clen;
359 ramdisk_image += clen; 359 ramdisk_image += clen;
360 ramdisk_size -= clen; 360 ramdisk_size -= clen;
@@ -438,7 +438,7 @@ static void __init parse_setup_data(void)
438 data_len = data->len + sizeof(struct setup_data); 438 data_len = data->len + sizeof(struct setup_data);
439 data_type = data->type; 439 data_type = data->type;
440 pa_next = data->next; 440 pa_next = data->next;
441 early_iounmap(data, sizeof(*data)); 441 early_memunmap(data, sizeof(*data));
442 442
443 switch (data_type) { 443 switch (data_type) {
444 case SETUP_E820_EXT: 444 case SETUP_E820_EXT:
@@ -470,7 +470,7 @@ static void __init e820_reserve_setup_data(void)
470 E820_RAM, E820_RESERVED_KERN); 470 E820_RAM, E820_RESERVED_KERN);
471 found = 1; 471 found = 1;
472 pa_data = data->next; 472 pa_data = data->next;
473 early_iounmap(data, sizeof(*data)); 473 early_memunmap(data, sizeof(*data));
474 } 474 }
475 if (!found) 475 if (!found)
476 return; 476 return;
@@ -491,7 +491,7 @@ static void __init memblock_x86_reserve_range_setup_data(void)
491 data = early_memremap(pa_data, sizeof(*data)); 491 data = early_memremap(pa_data, sizeof(*data));
492 memblock_reserve(pa_data, sizeof(*data) + data->len); 492 memblock_reserve(pa_data, sizeof(*data) + data->len);
493 pa_data = data->next; 493 pa_data = data->next;
494 early_iounmap(data, sizeof(*data)); 494 early_memunmap(data, sizeof(*data));
495 } 495 }
496} 496}
497 497
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 30277e27431a..10e0272d789a 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -34,10 +34,26 @@ static unsigned long get_align_mask(void)
34 return va_align.mask; 34 return va_align.mask;
35} 35}
36 36
37/*
38 * To avoid aliasing in the I$ on AMD F15h, the bits defined by the
39 * va_align.bits, [12:upper_bit), are set to a random value instead of
40 * zeroing them. This random value is computed once per boot. This form
41 * of ASLR is known as "per-boot ASLR".
42 *
43 * To achieve this, the random value is added to the info.align_offset
44 * value before calling vm_unmapped_area() or ORed directly to the
45 * address.
46 */
47static unsigned long get_align_bits(void)
48{
49 return va_align.bits & get_align_mask();
50}
51
37unsigned long align_vdso_addr(unsigned long addr) 52unsigned long align_vdso_addr(unsigned long addr)
38{ 53{
39 unsigned long align_mask = get_align_mask(); 54 unsigned long align_mask = get_align_mask();
40 return (addr + align_mask) & ~align_mask; 55 addr = (addr + align_mask) & ~align_mask;
56 return addr | get_align_bits();
41} 57}
42 58
43static int __init control_va_addr_alignment(char *str) 59static int __init control_va_addr_alignment(char *str)
@@ -135,8 +151,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
135 info.length = len; 151 info.length = len;
136 info.low_limit = begin; 152 info.low_limit = begin;
137 info.high_limit = end; 153 info.high_limit = end;
138 info.align_mask = filp ? get_align_mask() : 0; 154 info.align_mask = 0;
139 info.align_offset = pgoff << PAGE_SHIFT; 155 info.align_offset = pgoff << PAGE_SHIFT;
156 if (filp) {
157 info.align_mask = get_align_mask();
158 info.align_offset += get_align_bits();
159 }
140 return vm_unmapped_area(&info); 160 return vm_unmapped_area(&info);
141} 161}
142 162
@@ -174,8 +194,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
174 info.length = len; 194 info.length = len;
175 info.low_limit = PAGE_SIZE; 195 info.low_limit = PAGE_SIZE;
176 info.high_limit = mm->mmap_base; 196 info.high_limit = mm->mmap_base;
177 info.align_mask = filp ? get_align_mask() : 0; 197 info.align_mask = 0;
178 info.align_offset = pgoff << PAGE_SHIFT; 198 info.align_offset = pgoff << PAGE_SHIFT;
199 if (filp) {
200 info.align_mask = get_align_mask();
201 info.align_offset += get_align_bits();
202 }
179 addr = vm_unmapped_area(&info); 203 addr = vm_unmapped_area(&info);
180 if (!(addr & ~PAGE_MASK)) 204 if (!(addr & ~PAGE_MASK))
181 return addr; 205 return addr;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 52417e771af9..1d553186c434 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -29,29 +29,33 @@
29 29
30/* 30/*
31 * Tables translating between page_cache_type_t and pte encoding. 31 * Tables translating between page_cache_type_t and pte encoding.
32 * Minimal supported modes are defined statically, modified if more supported 32 *
33 * cache modes are available. 33 * Minimal supported modes are defined statically, they are modified
34 * Index into __cachemode2pte_tbl is the cachemode. 34 * during bootup if more supported cache modes are available.
35 * Index into __pte2cachemode_tbl are the caching attribute bits of the pte 35 *
36 * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. 36 * Index into __cachemode2pte_tbl[] is the cachemode.
37 *
38 * Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte
39 * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
37 */ 40 */
38uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { 41uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
39 [_PAGE_CACHE_MODE_WB] = 0, 42 [_PAGE_CACHE_MODE_WB ] = 0 | 0 ,
40 [_PAGE_CACHE_MODE_WC] = _PAGE_PWT, 43 [_PAGE_CACHE_MODE_WC ] = _PAGE_PWT | 0 ,
41 [_PAGE_CACHE_MODE_UC_MINUS] = _PAGE_PCD, 44 [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD,
42 [_PAGE_CACHE_MODE_UC] = _PAGE_PCD | _PAGE_PWT, 45 [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD,
43 [_PAGE_CACHE_MODE_WT] = _PAGE_PCD, 46 [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD,
44 [_PAGE_CACHE_MODE_WP] = _PAGE_PCD, 47 [_PAGE_CACHE_MODE_WP ] = 0 | _PAGE_PCD,
45}; 48};
46EXPORT_SYMBOL(__cachemode2pte_tbl); 49EXPORT_SYMBOL(__cachemode2pte_tbl);
50
47uint8_t __pte2cachemode_tbl[8] = { 51uint8_t __pte2cachemode_tbl[8] = {
48 [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB, 52 [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB,
49 [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC, 53 [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_WC,
50 [__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS, 54 [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS,
51 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC, 55 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC,
52 [__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB, 56 [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
53 [__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, 57 [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
54 [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, 58 [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
55 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, 59 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
56}; 60};
57EXPORT_SYMBOL(__pte2cachemode_tbl); 61EXPORT_SYMBOL(__pte2cachemode_tbl);
@@ -131,21 +135,7 @@ void __init early_alloc_pgt_buf(void)
131 135
132int after_bootmem; 136int after_bootmem;
133 137
134int direct_gbpages 138early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES);
135#ifdef CONFIG_DIRECT_GBPAGES
136 = 1
137#endif
138;
139
140static void __init init_gbpages(void)
141{
142#ifdef CONFIG_X86_64
143 if (direct_gbpages && cpu_has_gbpages)
144 printk(KERN_INFO "Using GB pages for direct mapping\n");
145 else
146 direct_gbpages = 0;
147#endif
148}
149 139
150struct map_range { 140struct map_range {
151 unsigned long start; 141 unsigned long start;
@@ -157,16 +147,12 @@ static int page_size_mask;
157 147
158static void __init probe_page_size_mask(void) 148static void __init probe_page_size_mask(void)
159{ 149{
160 init_gbpages();
161
162#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) 150#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
163 /* 151 /*
164 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. 152 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
165 * This will simplify cpa(), which otherwise needs to support splitting 153 * This will simplify cpa(), which otherwise needs to support splitting
166 * large pages into small in interrupt context, etc. 154 * large pages into small in interrupt context, etc.
167 */ 155 */
168 if (direct_gbpages)
169 page_size_mask |= 1 << PG_LEVEL_1G;
170 if (cpu_has_pse) 156 if (cpu_has_pse)
171 page_size_mask |= 1 << PG_LEVEL_2M; 157 page_size_mask |= 1 << PG_LEVEL_2M;
172#endif 158#endif
@@ -181,6 +167,14 @@ static void __init probe_page_size_mask(void)
181 __supported_pte_mask |= _PAGE_GLOBAL; 167 __supported_pte_mask |= _PAGE_GLOBAL;
182 } else 168 } else
183 __supported_pte_mask &= ~_PAGE_GLOBAL; 169 __supported_pte_mask &= ~_PAGE_GLOBAL;
170
171 /* Enable 1 GB linear kernel mappings if available: */
172 if (direct_gbpages && cpu_has_gbpages) {
173 printk(KERN_INFO "Using GB pages for direct mapping\n");
174 page_size_mask |= 1 << PG_LEVEL_1G;
175 } else {
176 direct_gbpages = 0;
177 }
184} 178}
185 179
186#ifdef CONFIG_X86_32 180#ifdef CONFIG_X86_32
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 30eb05ae7061..3fba623e3ba5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -130,20 +130,6 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
130 return 0; 130 return 0;
131} 131}
132 132
133static int __init parse_direct_gbpages_off(char *arg)
134{
135 direct_gbpages = 0;
136 return 0;
137}
138early_param("nogbpages", parse_direct_gbpages_off);
139
140static int __init parse_direct_gbpages_on(char *arg)
141{
142 direct_gbpages = 1;
143 return 0;
144}
145early_param("gbpages", parse_direct_gbpages_on);
146
147/* 133/*
148 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the 134 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
149 * physical space so we can cache the place of the first one and move 135 * physical space so we can cache the place of the first one and move
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 536ea2fb6e33..89af288ec674 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -81,11 +81,9 @@ void arch_report_meminfo(struct seq_file *m)
81 seq_printf(m, "DirectMap4M: %8lu kB\n", 81 seq_printf(m, "DirectMap4M: %8lu kB\n",
82 direct_pages_count[PG_LEVEL_2M] << 12); 82 direct_pages_count[PG_LEVEL_2M] << 12);
83#endif 83#endif
84#ifdef CONFIG_X86_64
85 if (direct_gbpages) 84 if (direct_gbpages)
86 seq_printf(m, "DirectMap1G: %8lu kB\n", 85 seq_printf(m, "DirectMap1G: %8lu kB\n",
87 direct_pages_count[PG_LEVEL_1G] << 20); 86 direct_pages_count[PG_LEVEL_1G] << 20);
88#endif
89} 87}
90#else 88#else
91static inline void split_page_count(int level) { } 89static inline void split_page_count(int level) { }
@@ -1654,13 +1652,11 @@ int set_memory_ro(unsigned long addr, int numpages)
1654{ 1652{
1655 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); 1653 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
1656} 1654}
1657EXPORT_SYMBOL_GPL(set_memory_ro);
1658 1655
1659int set_memory_rw(unsigned long addr, int numpages) 1656int set_memory_rw(unsigned long addr, int numpages)
1660{ 1657{
1661 return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); 1658 return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
1662} 1659}
1663EXPORT_SYMBOL_GPL(set_memory_rw);
1664 1660
1665int set_memory_np(unsigned long addr, int numpages) 1661int set_memory_np(unsigned long addr, int numpages)
1666{ 1662{
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7ac68698406c..35af6771a95a 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -610,7 +610,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
610} 610}
611 611
612#ifdef CONFIG_STRICT_DEVMEM 612#ifdef CONFIG_STRICT_DEVMEM
613/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/ 613/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
614static inline int range_is_allowed(unsigned long pfn, unsigned long size) 614static inline int range_is_allowed(unsigned long pfn, unsigned long size)
615{ 615{
616 return 1; 616 return 1;
@@ -628,8 +628,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
628 628
629 while (cursor < to) { 629 while (cursor < to) {
630 if (!devmem_is_allowed(pfn)) { 630 if (!devmem_is_allowed(pfn)) {
631 printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n", 631 printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
632 current->comm, from, to - 1); 632 current->comm, from, to - 1);
633 return 0; 633 return 0;
634 } 634 }
635 cursor += PAGE_SIZE; 635 cursor += PAGE_SIZE;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 7b22adaad4f1..5a7e5252c878 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -275,12 +275,87 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
275 } 275 }
276} 276}
277 277
278/*
279 * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
280 * assumes that pgd should be in one page.
281 *
282 * But kernel with PAE paging that is not running as a Xen domain
283 * only needs to allocate 32 bytes for pgd instead of one page.
284 */
285#ifdef CONFIG_X86_PAE
286
287#include <linux/slab.h>
288
289#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
290#define PGD_ALIGN 32
291
292static struct kmem_cache *pgd_cache;
293
294static int __init pgd_cache_init(void)
295{
296 /*
297 * When PAE kernel is running as a Xen domain, it does not use
298 * shared kernel pmd. And this requires a whole page for pgd.
299 */
300 if (!SHARED_KERNEL_PMD)
301 return 0;
302
303 /*
304 * when PAE kernel is not running as a Xen domain, it uses
305 * shared kernel pmd. Shared kernel pmd does not require a whole
306 * page for pgd. We are able to just allocate a 32-byte for pgd.
307 * During boot time, we create a 32-byte slab for pgd table allocation.
308 */
309 pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
310 SLAB_PANIC, NULL);
311 if (!pgd_cache)
312 return -ENOMEM;
313
314 return 0;
315}
316core_initcall(pgd_cache_init);
317
318static inline pgd_t *_pgd_alloc(void)
319{
320 /*
321 * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
322 * We allocate one page for pgd.
323 */
324 if (!SHARED_KERNEL_PMD)
325 return (pgd_t *)__get_free_page(PGALLOC_GFP);
326
327 /*
328 * Now PAE kernel is not running as a Xen domain. We can allocate
329 * a 32-byte slab for pgd to save memory space.
330 */
331 return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
332}
333
334static inline void _pgd_free(pgd_t *pgd)
335{
336 if (!SHARED_KERNEL_PMD)
337 free_page((unsigned long)pgd);
338 else
339 kmem_cache_free(pgd_cache, pgd);
340}
341#else
342static inline pgd_t *_pgd_alloc(void)
343{
344 return (pgd_t *)__get_free_page(PGALLOC_GFP);
345}
346
347static inline void _pgd_free(pgd_t *pgd)
348{
349 free_page((unsigned long)pgd);
350}
351#endif /* CONFIG_X86_PAE */
352
278pgd_t *pgd_alloc(struct mm_struct *mm) 353pgd_t *pgd_alloc(struct mm_struct *mm)
279{ 354{
280 pgd_t *pgd; 355 pgd_t *pgd;
281 pmd_t *pmds[PREALLOCATED_PMDS]; 356 pmd_t *pmds[PREALLOCATED_PMDS];
282 357
283 pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); 358 pgd = _pgd_alloc();
284 359
285 if (pgd == NULL) 360 if (pgd == NULL)
286 goto out; 361 goto out;
@@ -310,7 +385,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
310out_free_pmds: 385out_free_pmds:
311 free_pmds(mm, pmds); 386 free_pmds(mm, pmds);
312out_free_pgd: 387out_free_pgd:
313 free_page((unsigned long)pgd); 388 _pgd_free(pgd);
314out: 389out:
315 return NULL; 390 return NULL;
316} 391}
@@ -320,7 +395,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
320 pgd_mop_up_pmds(mm, pgd); 395 pgd_mop_up_pmds(mm, pgd);
321 pgd_dtor(pgd); 396 pgd_dtor(pgd);
322 paravirt_pgd_free(mm, pgd); 397 paravirt_pgd_free(mm, pgd);
323 free_page((unsigned long)pgd); 398 _pgd_free(pgd);
324} 399}
325 400
326/* 401/*
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index d143d216d52b..d7f997f7c26d 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -67,7 +67,7 @@ void __init efi_bgrt_init(void)
67 67
68 image = efi_lookup_mapped_addr(bgrt_tab->image_address); 68 image = efi_lookup_mapped_addr(bgrt_tab->image_address);
69 if (!image) { 69 if (!image) {
70 image = early_memremap(bgrt_tab->image_address, 70 image = early_ioremap(bgrt_tab->image_address,
71 sizeof(bmp_header)); 71 sizeof(bmp_header));
72 ioremapped = true; 72 ioremapped = true;
73 if (!image) { 73 if (!image) {
@@ -89,7 +89,7 @@ void __init efi_bgrt_init(void)
89 } 89 }
90 90
91 if (ioremapped) { 91 if (ioremapped) {
92 image = early_memremap(bgrt_tab->image_address, 92 image = early_ioremap(bgrt_tab->image_address,
93 bmp_header.size); 93 bmp_header.size);
94 if (!image) { 94 if (!image) {
95 pr_err("Ignoring BGRT: failed to map image memory\n"); 95 pr_err("Ignoring BGRT: failed to map image memory\n");
diff --git a/include/linux/init.h b/include/linux/init.h
index 2df8e8dd10a4..21b6d768edd7 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -253,21 +253,41 @@ struct obs_kernel_param {
253 * obs_kernel_param "array" too far apart in .init.setup. 253 * obs_kernel_param "array" too far apart in .init.setup.
254 */ 254 */
255#define __setup_param(str, unique_id, fn, early) \ 255#define __setup_param(str, unique_id, fn, early) \
256 static const char __setup_str_##unique_id[] __initconst \ 256 static const char __setup_str_##unique_id[] __initconst \
257 __aligned(1) = str; \ 257 __aligned(1) = str; \
258 static struct obs_kernel_param __setup_##unique_id \ 258 static struct obs_kernel_param __setup_##unique_id \
259 __used __section(.init.setup) \ 259 __used __section(.init.setup) \
260 __attribute__((aligned((sizeof(long))))) \ 260 __attribute__((aligned((sizeof(long))))) \
261 = { __setup_str_##unique_id, fn, early } 261 = { __setup_str_##unique_id, fn, early }
262 262
263#define __setup(str, fn) \ 263#define __setup(str, fn) \
264 __setup_param(str, fn, fn, 0) 264 __setup_param(str, fn, fn, 0)
265 265
266/* NOTE: fn is as per module_param, not __setup! Emits warning if fn 266/*
267 * returns non-zero. */ 267 * NOTE: fn is as per module_param, not __setup!
268#define early_param(str, fn) \ 268 * Emits warning if fn returns non-zero.
269 */
270#define early_param(str, fn) \
269 __setup_param(str, fn, fn, 1) 271 __setup_param(str, fn, fn, 1)
270 272
273#define early_param_on_off(str_on, str_off, var, config) \
274 \
275 int var = IS_ENABLED(config); \
276 \
277 static int __init parse_##var##_on(char *arg) \
278 { \
279 var = 1; \
280 return 0; \
281 } \
282 __setup_param(str_on, parse_##var##_on, parse_##var##_on, 1); \
283 \
284 static int __init parse_##var##_off(char *arg) \
285 { \
286 var = 0; \
287 return 0; \
288 } \
289 __setup_param(str_off, parse_##var##_off, parse_##var##_off, 1)
290
271/* Relies on boot_command_line being set */ 291/* Relies on boot_command_line being set */
272void __init parse_early_param(void); 292void __init parse_early_param(void);
273void __init parse_early_options(char *cmdline); 293void __init parse_early_options(char *cmdline);