diff options
Diffstat (limited to 'arch/x86')
47 files changed, 1250 insertions, 783 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a494fa34713a..7bb15747fea2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1921,54 +1921,38 @@ config RELOCATABLE | |||
1921 | (CONFIG_PHYSICAL_START) is used as the minimum location. | 1921 | (CONFIG_PHYSICAL_START) is used as the minimum location. |
1922 | 1922 | ||
1923 | config RANDOMIZE_BASE | 1923 | config RANDOMIZE_BASE |
1924 | bool "Randomize the address of the kernel image" | 1924 | bool "Randomize the address of the kernel image (KASLR)" |
1925 | depends on RELOCATABLE | 1925 | depends on RELOCATABLE |
1926 | default n | 1926 | default n |
1927 | ---help--- | 1927 | ---help--- |
1928 | Randomizes the physical and virtual address at which the | 1928 | In support of Kernel Address Space Layout Randomization (KASLR), |
1929 | kernel image is decompressed, as a security feature that | 1929 | this randomizes the physical address at which the kernel image |
1930 | deters exploit attempts relying on knowledge of the location | 1930 | is decompressed and the virtual address where the kernel |
1931 | of kernel internals. | 1931 | image is mapped, as a security feature that deters exploit |
1932 | attempts relying on knowledge of the location of kernel | ||
1933 | code internals. | ||
1934 | |||
1935 | The kernel physical and virtual address can be randomized | ||
1936 | from 16MB up to 1GB on 64-bit and 512MB on 32-bit. (Note that | ||
1937 | using RANDOMIZE_BASE reduces the memory space available to | ||
1938 | kernel modules from 1.5GB to 1GB.) | ||
1939 | |||
1940 | Entropy is generated using the RDRAND instruction if it is | ||
1941 | supported. If RDTSC is supported, its value is mixed into | ||
1942 | the entropy pool as well. If neither RDRAND nor RDTSC are | ||
1943 | supported, then entropy is read from the i8254 timer. | ||
1944 | |||
1945 | Since the kernel is built using 2GB addressing, and | ||
1946 | PHYSICAL_ALIGN must be at a minimum of 2MB, only 10 bits of | ||
1947 | entropy is theoretically possible. Currently, with the | ||
1948 | default value for PHYSICAL_ALIGN and due to page table | ||
1949 | layouts, 64-bit uses 9 bits of entropy and 32-bit uses 8 bits. | ||
1950 | |||
1951 | If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot | ||
1952 | time. To enable it, boot with "kaslr" on the kernel command | ||
1953 | line (which will also disable hibernation). | ||
1932 | 1954 | ||
1933 | Entropy is generated using the RDRAND instruction if it is | 1955 | If unsure, say N. |
1934 | supported. If RDTSC is supported, it is used as well. If | ||
1935 | neither RDRAND nor RDTSC are supported, then randomness is | ||
1936 | read from the i8254 timer. | ||
1937 | |||
1938 | The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET, | ||
1939 | and aligned according to PHYSICAL_ALIGN. Since the kernel is | ||
1940 | built using 2GiB addressing, and PHYSICAL_ALGIN must be at a | ||
1941 | minimum of 2MiB, only 10 bits of entropy is theoretically | ||
1942 | possible. At best, due to page table layouts, 64-bit can use | ||
1943 | 9 bits of entropy and 32-bit uses 8 bits. | ||
1944 | |||
1945 | If unsure, say N. | ||
1946 | |||
1947 | config RANDOMIZE_BASE_MAX_OFFSET | ||
1948 | hex "Maximum kASLR offset allowed" if EXPERT | ||
1949 | depends on RANDOMIZE_BASE | ||
1950 | range 0x0 0x20000000 if X86_32 | ||
1951 | default "0x20000000" if X86_32 | ||
1952 | range 0x0 0x40000000 if X86_64 | ||
1953 | default "0x40000000" if X86_64 | ||
1954 | ---help--- | ||
1955 | The lesser of RANDOMIZE_BASE_MAX_OFFSET and available physical | ||
1956 | memory is used to determine the maximal offset in bytes that will | ||
1957 | be applied to the kernel when kernel Address Space Layout | ||
1958 | Randomization (kASLR) is active. This must be a multiple of | ||
1959 | PHYSICAL_ALIGN. | ||
1960 | |||
1961 | On 32-bit this is limited to 512MiB by page table layouts. The | ||
1962 | default is 512MiB. | ||
1963 | |||
1964 | On 64-bit this is limited by how the kernel fixmap page table is | ||
1965 | positioned, so this cannot be larger than 1GiB currently. Without | ||
1966 | RANDOMIZE_BASE, there is a 512MiB to 1.5GiB split between kernel | ||
1967 | and modules. When RANDOMIZE_BASE_MAX_OFFSET is above 512MiB, the | ||
1968 | modules area will shrink to compensate, up to the current maximum | ||
1969 | 1GiB to 1GiB split. The default is 1GiB. | ||
1970 | |||
1971 | If unsure, leave at the default value. | ||
1972 | 1956 | ||
1973 | # Relocation on x86 needs some additional build support | 1957 | # Relocation on x86 needs some additional build support |
1974 | config X86_NEED_RELOCS | 1958 | config X86_NEED_RELOCS |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 4086abca0b32..6fce7f096b88 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -208,7 +208,8 @@ endif | |||
208 | 208 | ||
209 | head-y := arch/x86/kernel/head_$(BITS).o | 209 | head-y := arch/x86/kernel/head_$(BITS).o |
210 | head-y += arch/x86/kernel/head$(BITS).o | 210 | head-y += arch/x86/kernel/head$(BITS).o |
211 | head-y += arch/x86/kernel/head.o | 211 | head-y += arch/x86/kernel/ebda.o |
212 | head-y += arch/x86/kernel/platform-quirks.o | ||
212 | 213 | ||
213 | libs-y += arch/x86/lib/ | 214 | libs-y += arch/x86/lib/ |
214 | 215 | ||
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index b1ef9e489084..700a9c6e6159 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile | |||
@@ -86,16 +86,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE | |||
86 | 86 | ||
87 | SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) | 87 | SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) |
88 | 88 | ||
89 | sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 0x\1/p' | 89 | sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' |
90 | |||
91 | quiet_cmd_voffset = VOFFSET $@ | ||
92 | cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ | ||
93 | |||
94 | targets += voffset.h | ||
95 | $(obj)/voffset.h: vmlinux FORCE | ||
96 | $(call if_changed,voffset) | ||
97 | |||
98 | sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' | ||
99 | 90 | ||
100 | quiet_cmd_zoffset = ZOFFSET $@ | 91 | quiet_cmd_zoffset = ZOFFSET $@ |
101 | cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ | 92 | cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ |
@@ -106,7 +97,7 @@ $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE | |||
106 | 97 | ||
107 | 98 | ||
108 | AFLAGS_header.o += -I$(obj) | 99 | AFLAGS_header.o += -I$(obj) |
109 | $(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h | 100 | $(obj)/header.o: $(obj)/zoffset.h |
110 | 101 | ||
111 | LDFLAGS_setup.elf := -T | 102 | LDFLAGS_setup.elf := -T |
112 | $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE | 103 | $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE |
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 8774cb23064f..cfdd8c3f8af2 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -57,12 +57,27 @@ LDFLAGS_vmlinux := -T | |||
57 | hostprogs-y := mkpiggy | 57 | hostprogs-y := mkpiggy |
58 | HOST_EXTRACFLAGS += -I$(srctree)/tools/include | 58 | HOST_EXTRACFLAGS += -I$(srctree)/tools/include |
59 | 59 | ||
60 | sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' | ||
61 | |||
62 | quiet_cmd_voffset = VOFFSET $@ | ||
63 | cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ | ||
64 | |||
65 | targets += ../voffset.h | ||
66 | |||
67 | $(obj)/../voffset.h: vmlinux FORCE | ||
68 | $(call if_changed,voffset) | ||
69 | |||
70 | $(obj)/misc.o: $(obj)/../voffset.h | ||
71 | |||
60 | vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ | 72 | vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ |
61 | $(obj)/string.o $(obj)/cmdline.o \ | 73 | $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \ |
62 | $(obj)/piggy.o $(obj)/cpuflags.o | 74 | $(obj)/piggy.o $(obj)/cpuflags.o |
63 | 75 | ||
64 | vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o | 76 | vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o |
65 | vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/aslr.o | 77 | vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o |
78 | ifdef CONFIG_X86_64 | ||
79 | vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o | ||
80 | endif | ||
66 | 81 | ||
67 | $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone | 82 | $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone |
68 | 83 | ||
@@ -109,10 +124,8 @@ suffix-$(CONFIG_KERNEL_XZ) := xz | |||
109 | suffix-$(CONFIG_KERNEL_LZO) := lzo | 124 | suffix-$(CONFIG_KERNEL_LZO) := lzo |
110 | suffix-$(CONFIG_KERNEL_LZ4) := lz4 | 125 | suffix-$(CONFIG_KERNEL_LZ4) := lz4 |
111 | 126 | ||
112 | RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \ | ||
113 | $(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh) | ||
114 | quiet_cmd_mkpiggy = MKPIGGY $@ | 127 | quiet_cmd_mkpiggy = MKPIGGY $@ |
115 | cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false ) | 128 | cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) |
116 | 129 | ||
117 | targets += piggy.S | 130 | targets += piggy.S |
118 | $(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE | 131 | $(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE |
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c deleted file mode 100644 index 6a9b96b4624d..000000000000 --- a/arch/x86/boot/compressed/aslr.c +++ /dev/null | |||
@@ -1,339 +0,0 @@ | |||
1 | #include "misc.h" | ||
2 | |||
3 | #include <asm/msr.h> | ||
4 | #include <asm/archrandom.h> | ||
5 | #include <asm/e820.h> | ||
6 | |||
7 | #include <generated/compile.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/uts.h> | ||
10 | #include <linux/utsname.h> | ||
11 | #include <generated/utsrelease.h> | ||
12 | |||
13 | /* Simplified build-specific string for starting entropy. */ | ||
14 | static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" | ||
15 | LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; | ||
16 | |||
17 | #define I8254_PORT_CONTROL 0x43 | ||
18 | #define I8254_PORT_COUNTER0 0x40 | ||
19 | #define I8254_CMD_READBACK 0xC0 | ||
20 | #define I8254_SELECT_COUNTER0 0x02 | ||
21 | #define I8254_STATUS_NOTREADY 0x40 | ||
22 | static inline u16 i8254(void) | ||
23 | { | ||
24 | u16 status, timer; | ||
25 | |||
26 | do { | ||
27 | outb(I8254_PORT_CONTROL, | ||
28 | I8254_CMD_READBACK | I8254_SELECT_COUNTER0); | ||
29 | status = inb(I8254_PORT_COUNTER0); | ||
30 | timer = inb(I8254_PORT_COUNTER0); | ||
31 | timer |= inb(I8254_PORT_COUNTER0) << 8; | ||
32 | } while (status & I8254_STATUS_NOTREADY); | ||
33 | |||
34 | return timer; | ||
35 | } | ||
36 | |||
37 | static unsigned long rotate_xor(unsigned long hash, const void *area, | ||
38 | size_t size) | ||
39 | { | ||
40 | size_t i; | ||
41 | unsigned long *ptr = (unsigned long *)area; | ||
42 | |||
43 | for (i = 0; i < size / sizeof(hash); i++) { | ||
44 | /* Rotate by odd number of bits and XOR. */ | ||
45 | hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); | ||
46 | hash ^= ptr[i]; | ||
47 | } | ||
48 | |||
49 | return hash; | ||
50 | } | ||
51 | |||
52 | /* Attempt to create a simple but unpredictable starting entropy. */ | ||
53 | static unsigned long get_random_boot(void) | ||
54 | { | ||
55 | unsigned long hash = 0; | ||
56 | |||
57 | hash = rotate_xor(hash, build_str, sizeof(build_str)); | ||
58 | hash = rotate_xor(hash, real_mode, sizeof(*real_mode)); | ||
59 | |||
60 | return hash; | ||
61 | } | ||
62 | |||
63 | static unsigned long get_random_long(void) | ||
64 | { | ||
65 | #ifdef CONFIG_X86_64 | ||
66 | const unsigned long mix_const = 0x5d6008cbf3848dd3UL; | ||
67 | #else | ||
68 | const unsigned long mix_const = 0x3f39e593UL; | ||
69 | #endif | ||
70 | unsigned long raw, random = get_random_boot(); | ||
71 | bool use_i8254 = true; | ||
72 | |||
73 | debug_putstr("KASLR using"); | ||
74 | |||
75 | if (has_cpuflag(X86_FEATURE_RDRAND)) { | ||
76 | debug_putstr(" RDRAND"); | ||
77 | if (rdrand_long(&raw)) { | ||
78 | random ^= raw; | ||
79 | use_i8254 = false; | ||
80 | } | ||
81 | } | ||
82 | |||
83 | if (has_cpuflag(X86_FEATURE_TSC)) { | ||
84 | debug_putstr(" RDTSC"); | ||
85 | raw = rdtsc(); | ||
86 | |||
87 | random ^= raw; | ||
88 | use_i8254 = false; | ||
89 | } | ||
90 | |||
91 | if (use_i8254) { | ||
92 | debug_putstr(" i8254"); | ||
93 | random ^= i8254(); | ||
94 | } | ||
95 | |||
96 | /* Circular multiply for better bit diffusion */ | ||
97 | asm("mul %3" | ||
98 | : "=a" (random), "=d" (raw) | ||
99 | : "a" (random), "rm" (mix_const)); | ||
100 | random += raw; | ||
101 | |||
102 | debug_putstr("...\n"); | ||
103 | |||
104 | return random; | ||
105 | } | ||
106 | |||
107 | struct mem_vector { | ||
108 | unsigned long start; | ||
109 | unsigned long size; | ||
110 | }; | ||
111 | |||
112 | #define MEM_AVOID_MAX 5 | ||
113 | static struct mem_vector mem_avoid[MEM_AVOID_MAX]; | ||
114 | |||
115 | static bool mem_contains(struct mem_vector *region, struct mem_vector *item) | ||
116 | { | ||
117 | /* Item at least partially before region. */ | ||
118 | if (item->start < region->start) | ||
119 | return false; | ||
120 | /* Item at least partially after region. */ | ||
121 | if (item->start + item->size > region->start + region->size) | ||
122 | return false; | ||
123 | return true; | ||
124 | } | ||
125 | |||
126 | static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) | ||
127 | { | ||
128 | /* Item one is entirely before item two. */ | ||
129 | if (one->start + one->size <= two->start) | ||
130 | return false; | ||
131 | /* Item one is entirely after item two. */ | ||
132 | if (one->start >= two->start + two->size) | ||
133 | return false; | ||
134 | return true; | ||
135 | } | ||
136 | |||
137 | static void mem_avoid_init(unsigned long input, unsigned long input_size, | ||
138 | unsigned long output, unsigned long output_size) | ||
139 | { | ||
140 | u64 initrd_start, initrd_size; | ||
141 | u64 cmd_line, cmd_line_size; | ||
142 | unsigned long unsafe, unsafe_len; | ||
143 | char *ptr; | ||
144 | |||
145 | /* | ||
146 | * Avoid the region that is unsafe to overlap during | ||
147 | * decompression (see calculations at top of misc.c). | ||
148 | */ | ||
149 | unsafe_len = (output_size >> 12) + 32768 + 18; | ||
150 | unsafe = (unsigned long)input + input_size - unsafe_len; | ||
151 | mem_avoid[0].start = unsafe; | ||
152 | mem_avoid[0].size = unsafe_len; | ||
153 | |||
154 | /* Avoid initrd. */ | ||
155 | initrd_start = (u64)real_mode->ext_ramdisk_image << 32; | ||
156 | initrd_start |= real_mode->hdr.ramdisk_image; | ||
157 | initrd_size = (u64)real_mode->ext_ramdisk_size << 32; | ||
158 | initrd_size |= real_mode->hdr.ramdisk_size; | ||
159 | mem_avoid[1].start = initrd_start; | ||
160 | mem_avoid[1].size = initrd_size; | ||
161 | |||
162 | /* Avoid kernel command line. */ | ||
163 | cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32; | ||
164 | cmd_line |= real_mode->hdr.cmd_line_ptr; | ||
165 | /* Calculate size of cmd_line. */ | ||
166 | ptr = (char *)(unsigned long)cmd_line; | ||
167 | for (cmd_line_size = 0; ptr[cmd_line_size++]; ) | ||
168 | ; | ||
169 | mem_avoid[2].start = cmd_line; | ||
170 | mem_avoid[2].size = cmd_line_size; | ||
171 | |||
172 | /* Avoid heap memory. */ | ||
173 | mem_avoid[3].start = (unsigned long)free_mem_ptr; | ||
174 | mem_avoid[3].size = BOOT_HEAP_SIZE; | ||
175 | |||
176 | /* Avoid stack memory. */ | ||
177 | mem_avoid[4].start = (unsigned long)free_mem_end_ptr; | ||
178 | mem_avoid[4].size = BOOT_STACK_SIZE; | ||
179 | } | ||
180 | |||
181 | /* Does this memory vector overlap a known avoided area? */ | ||
182 | static bool mem_avoid_overlap(struct mem_vector *img) | ||
183 | { | ||
184 | int i; | ||
185 | struct setup_data *ptr; | ||
186 | |||
187 | for (i = 0; i < MEM_AVOID_MAX; i++) { | ||
188 | if (mem_overlaps(img, &mem_avoid[i])) | ||
189 | return true; | ||
190 | } | ||
191 | |||
192 | /* Avoid all entries in the setup_data linked list. */ | ||
193 | ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data; | ||
194 | while (ptr) { | ||
195 | struct mem_vector avoid; | ||
196 | |||
197 | avoid.start = (unsigned long)ptr; | ||
198 | avoid.size = sizeof(*ptr) + ptr->len; | ||
199 | |||
200 | if (mem_overlaps(img, &avoid)) | ||
201 | return true; | ||
202 | |||
203 | ptr = (struct setup_data *)(unsigned long)ptr->next; | ||
204 | } | ||
205 | |||
206 | return false; | ||
207 | } | ||
208 | |||
209 | static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / | ||
210 | CONFIG_PHYSICAL_ALIGN]; | ||
211 | static unsigned long slot_max; | ||
212 | |||
213 | static void slots_append(unsigned long addr) | ||
214 | { | ||
215 | /* Overflowing the slots list should be impossible. */ | ||
216 | if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET / | ||
217 | CONFIG_PHYSICAL_ALIGN) | ||
218 | return; | ||
219 | |||
220 | slots[slot_max++] = addr; | ||
221 | } | ||
222 | |||
223 | static unsigned long slots_fetch_random(void) | ||
224 | { | ||
225 | /* Handle case of no slots stored. */ | ||
226 | if (slot_max == 0) | ||
227 | return 0; | ||
228 | |||
229 | return slots[get_random_long() % slot_max]; | ||
230 | } | ||
231 | |||
232 | static void process_e820_entry(struct e820entry *entry, | ||
233 | unsigned long minimum, | ||
234 | unsigned long image_size) | ||
235 | { | ||
236 | struct mem_vector region, img; | ||
237 | |||
238 | /* Skip non-RAM entries. */ | ||
239 | if (entry->type != E820_RAM) | ||
240 | return; | ||
241 | |||
242 | /* Ignore entries entirely above our maximum. */ | ||
243 | if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET) | ||
244 | return; | ||
245 | |||
246 | /* Ignore entries entirely below our minimum. */ | ||
247 | if (entry->addr + entry->size < minimum) | ||
248 | return; | ||
249 | |||
250 | region.start = entry->addr; | ||
251 | region.size = entry->size; | ||
252 | |||
253 | /* Potentially raise address to minimum location. */ | ||
254 | if (region.start < minimum) | ||
255 | region.start = minimum; | ||
256 | |||
257 | /* Potentially raise address to meet alignment requirements. */ | ||
258 | region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); | ||
259 | |||
260 | /* Did we raise the address above the bounds of this e820 region? */ | ||
261 | if (region.start > entry->addr + entry->size) | ||
262 | return; | ||
263 | |||
264 | /* Reduce size by any delta from the original address. */ | ||
265 | region.size -= region.start - entry->addr; | ||
266 | |||
267 | /* Reduce maximum size to fit end of image within maximum limit. */ | ||
268 | if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET) | ||
269 | region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start; | ||
270 | |||
271 | /* Walk each aligned slot and check for avoided areas. */ | ||
272 | for (img.start = region.start, img.size = image_size ; | ||
273 | mem_contains(®ion, &img) ; | ||
274 | img.start += CONFIG_PHYSICAL_ALIGN) { | ||
275 | if (mem_avoid_overlap(&img)) | ||
276 | continue; | ||
277 | slots_append(img.start); | ||
278 | } | ||
279 | } | ||
280 | |||
281 | static unsigned long find_random_addr(unsigned long minimum, | ||
282 | unsigned long size) | ||
283 | { | ||
284 | int i; | ||
285 | unsigned long addr; | ||
286 | |||
287 | /* Make sure minimum is aligned. */ | ||
288 | minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); | ||
289 | |||
290 | /* Verify potential e820 positions, appending to slots list. */ | ||
291 | for (i = 0; i < real_mode->e820_entries; i++) { | ||
292 | process_e820_entry(&real_mode->e820_map[i], minimum, size); | ||
293 | } | ||
294 | |||
295 | return slots_fetch_random(); | ||
296 | } | ||
297 | |||
298 | unsigned char *choose_kernel_location(struct boot_params *boot_params, | ||
299 | unsigned char *input, | ||
300 | unsigned long input_size, | ||
301 | unsigned char *output, | ||
302 | unsigned long output_size) | ||
303 | { | ||
304 | unsigned long choice = (unsigned long)output; | ||
305 | unsigned long random; | ||
306 | |||
307 | #ifdef CONFIG_HIBERNATION | ||
308 | if (!cmdline_find_option_bool("kaslr")) { | ||
309 | debug_putstr("KASLR disabled by default...\n"); | ||
310 | goto out; | ||
311 | } | ||
312 | #else | ||
313 | if (cmdline_find_option_bool("nokaslr")) { | ||
314 | debug_putstr("KASLR disabled by cmdline...\n"); | ||
315 | goto out; | ||
316 | } | ||
317 | #endif | ||
318 | |||
319 | boot_params->hdr.loadflags |= KASLR_FLAG; | ||
320 | |||
321 | /* Record the various known unsafe memory ranges. */ | ||
322 | mem_avoid_init((unsigned long)input, input_size, | ||
323 | (unsigned long)output, output_size); | ||
324 | |||
325 | /* Walk e820 and find a random address. */ | ||
326 | random = find_random_addr(choice, output_size); | ||
327 | if (!random) { | ||
328 | debug_putstr("KASLR could not find suitable E820 region...\n"); | ||
329 | goto out; | ||
330 | } | ||
331 | |||
332 | /* Always enforce the minimum. */ | ||
333 | if (random < choice) | ||
334 | goto out; | ||
335 | |||
336 | choice = random; | ||
337 | out: | ||
338 | return (unsigned char *)choice; | ||
339 | } | ||
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index b68e3033e6b9..73ccf63b0f48 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c | |||
@@ -15,9 +15,9 @@ static inline char rdfs8(addr_t addr) | |||
15 | #include "../cmdline.c" | 15 | #include "../cmdline.c" |
16 | static unsigned long get_cmd_line_ptr(void) | 16 | static unsigned long get_cmd_line_ptr(void) |
17 | { | 17 | { |
18 | unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr; | 18 | unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr; |
19 | 19 | ||
20 | cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32; | 20 | cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32; |
21 | 21 | ||
22 | return cmd_line_ptr; | 22 | return cmd_line_ptr; |
23 | } | 23 | } |
diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c new file mode 100644 index 000000000000..6248740b68b5 --- /dev/null +++ b/arch/x86/boot/compressed/error.c | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Callers outside of misc.c need access to the error reporting routines, | ||
3 | * but the *_putstr() functions need to stay in misc.c because of how | ||
4 | * memcpy() and memmove() are defined for the compressed boot environment. | ||
5 | */ | ||
6 | #include "misc.h" | ||
7 | |||
8 | void warn(char *m) | ||
9 | { | ||
10 | error_putstr("\n\n"); | ||
11 | error_putstr(m); | ||
12 | error_putstr("\n\n"); | ||
13 | } | ||
14 | |||
15 | void error(char *m) | ||
16 | { | ||
17 | warn(m); | ||
18 | error_putstr(" -- System halted"); | ||
19 | |||
20 | while (1) | ||
21 | asm("hlt"); | ||
22 | } | ||
diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h new file mode 100644 index 000000000000..2e59dac07f9e --- /dev/null +++ b/arch/x86/boot/compressed/error.h | |||
@@ -0,0 +1,7 @@ | |||
1 | #ifndef BOOT_COMPRESSED_ERROR_H | ||
2 | #define BOOT_COMPRESSED_ERROR_H | ||
3 | |||
4 | void warn(char *m); | ||
5 | void error(char *m); | ||
6 | |||
7 | #endif /* BOOT_COMPRESSED_ERROR_H */ | ||
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 0256064da8da..1038524270e7 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -176,7 +176,9 @@ preferred_addr: | |||
176 | 1: | 176 | 1: |
177 | 177 | ||
178 | /* Target address to relocate to for decompression */ | 178 | /* Target address to relocate to for decompression */ |
179 | addl $z_extract_offset, %ebx | 179 | movl BP_init_size(%esi), %eax |
180 | subl $_end, %eax | ||
181 | addl %eax, %ebx | ||
180 | 182 | ||
181 | /* Set up the stack */ | 183 | /* Set up the stack */ |
182 | leal boot_stack_end(%ebx), %esp | 184 | leal boot_stack_end(%ebx), %esp |
@@ -233,24 +235,28 @@ relocated: | |||
233 | 2: | 235 | 2: |
234 | 236 | ||
235 | /* | 237 | /* |
236 | * Do the decompression, and jump to the new kernel.. | 238 | * Do the extraction, and jump to the new kernel.. |
237 | */ | 239 | */ |
238 | /* push arguments for decompress_kernel: */ | 240 | /* push arguments for extract_kernel: */ |
239 | pushl $z_run_size /* size of kernel with .bss and .brk */ | ||
240 | pushl $z_output_len /* decompressed length, end of relocs */ | 241 | pushl $z_output_len /* decompressed length, end of relocs */ |
241 | leal z_extract_offset_negative(%ebx), %ebp | 242 | |
243 | movl BP_init_size(%esi), %eax | ||
244 | subl $_end, %eax | ||
245 | movl %ebx, %ebp | ||
246 | subl %eax, %ebp | ||
242 | pushl %ebp /* output address */ | 247 | pushl %ebp /* output address */ |
248 | |||
243 | pushl $z_input_len /* input_len */ | 249 | pushl $z_input_len /* input_len */ |
244 | leal input_data(%ebx), %eax | 250 | leal input_data(%ebx), %eax |
245 | pushl %eax /* input_data */ | 251 | pushl %eax /* input_data */ |
246 | leal boot_heap(%ebx), %eax | 252 | leal boot_heap(%ebx), %eax |
247 | pushl %eax /* heap area */ | 253 | pushl %eax /* heap area */ |
248 | pushl %esi /* real mode pointer */ | 254 | pushl %esi /* real mode pointer */ |
249 | call decompress_kernel /* returns kernel location in %eax */ | 255 | call extract_kernel /* returns kernel location in %eax */ |
250 | addl $28, %esp | 256 | addl $24, %esp |
251 | 257 | ||
252 | /* | 258 | /* |
253 | * Jump to the decompressed kernel. | 259 | * Jump to the extracted kernel. |
254 | */ | 260 | */ |
255 | xorl %ebx, %ebx | 261 | xorl %ebx, %ebx |
256 | jmp *%eax | 262 | jmp *%eax |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 86558a199139..0d80a7ad65cd 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -110,7 +110,9 @@ ENTRY(startup_32) | |||
110 | 1: | 110 | 1: |
111 | 111 | ||
112 | /* Target address to relocate to for decompression */ | 112 | /* Target address to relocate to for decompression */ |
113 | addl $z_extract_offset, %ebx | 113 | movl BP_init_size(%esi), %eax |
114 | subl $_end, %eax | ||
115 | addl %eax, %ebx | ||
114 | 116 | ||
115 | /* | 117 | /* |
116 | * Prepare for entering 64 bit mode | 118 | * Prepare for entering 64 bit mode |
@@ -132,7 +134,7 @@ ENTRY(startup_32) | |||
132 | /* Initialize Page tables to 0 */ | 134 | /* Initialize Page tables to 0 */ |
133 | leal pgtable(%ebx), %edi | 135 | leal pgtable(%ebx), %edi |
134 | xorl %eax, %eax | 136 | xorl %eax, %eax |
135 | movl $((4096*6)/4), %ecx | 137 | movl $(BOOT_INIT_PGT_SIZE/4), %ecx |
136 | rep stosl | 138 | rep stosl |
137 | 139 | ||
138 | /* Build Level 4 */ | 140 | /* Build Level 4 */ |
@@ -338,7 +340,9 @@ preferred_addr: | |||
338 | 1: | 340 | 1: |
339 | 341 | ||
340 | /* Target address to relocate to for decompression */ | 342 | /* Target address to relocate to for decompression */ |
341 | leaq z_extract_offset(%rbp), %rbx | 343 | movl BP_init_size(%rsi), %ebx |
344 | subl $_end, %ebx | ||
345 | addq %rbp, %rbx | ||
342 | 346 | ||
343 | /* Set up the stack */ | 347 | /* Set up the stack */ |
344 | leaq boot_stack_end(%rbx), %rsp | 348 | leaq boot_stack_end(%rbx), %rsp |
@@ -408,19 +412,16 @@ relocated: | |||
408 | 2: | 412 | 2: |
409 | 413 | ||
410 | /* | 414 | /* |
411 | * Do the decompression, and jump to the new kernel.. | 415 | * Do the extraction, and jump to the new kernel.. |
412 | */ | 416 | */ |
413 | pushq %rsi /* Save the real mode argument */ | 417 | pushq %rsi /* Save the real mode argument */ |
414 | movq $z_run_size, %r9 /* size of kernel with .bss and .brk */ | ||
415 | pushq %r9 | ||
416 | movq %rsi, %rdi /* real mode address */ | 418 | movq %rsi, %rdi /* real mode address */ |
417 | leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ | 419 | leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ |
418 | leaq input_data(%rip), %rdx /* input_data */ | 420 | leaq input_data(%rip), %rdx /* input_data */ |
419 | movl $z_input_len, %ecx /* input_len */ | 421 | movl $z_input_len, %ecx /* input_len */ |
420 | movq %rbp, %r8 /* output target address */ | 422 | movq %rbp, %r8 /* output target address */ |
421 | movq $z_output_len, %r9 /* decompressed length, end of relocs */ | 423 | movq $z_output_len, %r9 /* decompressed length, end of relocs */ |
422 | call decompress_kernel /* returns kernel location in %rax */ | 424 | call extract_kernel /* returns kernel location in %rax */ |
423 | popq %r9 | ||
424 | popq %rsi | 425 | popq %rsi |
425 | 426 | ||
426 | /* | 427 | /* |
@@ -485,4 +486,4 @@ boot_stack_end: | |||
485 | .section ".pgtable","a",@nobits | 486 | .section ".pgtable","a",@nobits |
486 | .balign 4096 | 487 | .balign 4096 |
487 | pgtable: | 488 | pgtable: |
488 | .fill 6*4096, 1, 0 | 489 | .fill BOOT_PGT_SIZE, 1, 0 |
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c new file mode 100644 index 000000000000..cfeb0259ed81 --- /dev/null +++ b/arch/x86/boot/compressed/kaslr.c | |||
@@ -0,0 +1,510 @@ | |||
1 | /* | ||
2 | * kaslr.c | ||
3 | * | ||
4 | * This contains the routines needed to generate a reasonable level of | ||
5 | * entropy to choose a randomized kernel base address offset in support | ||
6 | * of Kernel Address Space Layout Randomization (KASLR). Additionally | ||
7 | * handles walking the physical memory maps (and tracking memory regions | ||
8 | * to avoid) in order to select a physical memory location that can | ||
9 | * contain the entire properly aligned running kernel image. | ||
10 | * | ||
11 | */ | ||
12 | #include "misc.h" | ||
13 | #include "error.h" | ||
14 | |||
15 | #include <asm/msr.h> | ||
16 | #include <asm/archrandom.h> | ||
17 | #include <asm/e820.h> | ||
18 | |||
19 | #include <generated/compile.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/uts.h> | ||
22 | #include <linux/utsname.h> | ||
23 | #include <generated/utsrelease.h> | ||
24 | |||
25 | /* Simplified build-specific string for starting entropy. */ | ||
26 | static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" | ||
27 | LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; | ||
28 | |||
29 | #define I8254_PORT_CONTROL 0x43 | ||
30 | #define I8254_PORT_COUNTER0 0x40 | ||
31 | #define I8254_CMD_READBACK 0xC0 | ||
32 | #define I8254_SELECT_COUNTER0 0x02 | ||
33 | #define I8254_STATUS_NOTREADY 0x40 | ||
34 | static inline u16 i8254(void) | ||
35 | { | ||
36 | u16 status, timer; | ||
37 | |||
38 | do { | ||
39 | outb(I8254_PORT_CONTROL, | ||
40 | I8254_CMD_READBACK | I8254_SELECT_COUNTER0); | ||
41 | status = inb(I8254_PORT_COUNTER0); | ||
42 | timer = inb(I8254_PORT_COUNTER0); | ||
43 | timer |= inb(I8254_PORT_COUNTER0) << 8; | ||
44 | } while (status & I8254_STATUS_NOTREADY); | ||
45 | |||
46 | return timer; | ||
47 | } | ||
48 | |||
49 | static unsigned long rotate_xor(unsigned long hash, const void *area, | ||
50 | size_t size) | ||
51 | { | ||
52 | size_t i; | ||
53 | unsigned long *ptr = (unsigned long *)area; | ||
54 | |||
55 | for (i = 0; i < size / sizeof(hash); i++) { | ||
56 | /* Rotate by odd number of bits and XOR. */ | ||
57 | hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); | ||
58 | hash ^= ptr[i]; | ||
59 | } | ||
60 | |||
61 | return hash; | ||
62 | } | ||
63 | |||
64 | /* Attempt to create a simple but unpredictable starting entropy. */ | ||
65 | static unsigned long get_random_boot(void) | ||
66 | { | ||
67 | unsigned long hash = 0; | ||
68 | |||
69 | hash = rotate_xor(hash, build_str, sizeof(build_str)); | ||
70 | hash = rotate_xor(hash, boot_params, sizeof(*boot_params)); | ||
71 | |||
72 | return hash; | ||
73 | } | ||
74 | |||
75 | static unsigned long get_random_long(const char *purpose) | ||
76 | { | ||
77 | #ifdef CONFIG_X86_64 | ||
78 | const unsigned long mix_const = 0x5d6008cbf3848dd3UL; | ||
79 | #else | ||
80 | const unsigned long mix_const = 0x3f39e593UL; | ||
81 | #endif | ||
82 | unsigned long raw, random = get_random_boot(); | ||
83 | bool use_i8254 = true; | ||
84 | |||
85 | debug_putstr(purpose); | ||
86 | debug_putstr(" KASLR using"); | ||
87 | |||
88 | if (has_cpuflag(X86_FEATURE_RDRAND)) { | ||
89 | debug_putstr(" RDRAND"); | ||
90 | if (rdrand_long(&raw)) { | ||
91 | random ^= raw; | ||
92 | use_i8254 = false; | ||
93 | } | ||
94 | } | ||
95 | |||
96 | if (has_cpuflag(X86_FEATURE_TSC)) { | ||
97 | debug_putstr(" RDTSC"); | ||
98 | raw = rdtsc(); | ||
99 | |||
100 | random ^= raw; | ||
101 | use_i8254 = false; | ||
102 | } | ||
103 | |||
104 | if (use_i8254) { | ||
105 | debug_putstr(" i8254"); | ||
106 | random ^= i8254(); | ||
107 | } | ||
108 | |||
109 | /* Circular multiply for better bit diffusion */ | ||
110 | asm("mul %3" | ||
111 | : "=a" (random), "=d" (raw) | ||
112 | : "a" (random), "rm" (mix_const)); | ||
113 | random += raw; | ||
114 | |||
115 | debug_putstr("...\n"); | ||
116 | |||
117 | return random; | ||
118 | } | ||
119 | |||
120 | struct mem_vector { | ||
121 | unsigned long start; | ||
122 | unsigned long size; | ||
123 | }; | ||
124 | |||
125 | enum mem_avoid_index { | ||
126 | MEM_AVOID_ZO_RANGE = 0, | ||
127 | MEM_AVOID_INITRD, | ||
128 | MEM_AVOID_CMDLINE, | ||
129 | MEM_AVOID_BOOTPARAMS, | ||
130 | MEM_AVOID_MAX, | ||
131 | }; | ||
132 | |||
133 | static struct mem_vector mem_avoid[MEM_AVOID_MAX]; | ||
134 | |||
135 | static bool mem_contains(struct mem_vector *region, struct mem_vector *item) | ||
136 | { | ||
137 | /* Item at least partially before region. */ | ||
138 | if (item->start < region->start) | ||
139 | return false; | ||
140 | /* Item at least partially after region. */ | ||
141 | if (item->start + item->size > region->start + region->size) | ||
142 | return false; | ||
143 | return true; | ||
144 | } | ||
145 | |||
146 | static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) | ||
147 | { | ||
148 | /* Item one is entirely before item two. */ | ||
149 | if (one->start + one->size <= two->start) | ||
150 | return false; | ||
151 | /* Item one is entirely after item two. */ | ||
152 | if (one->start >= two->start + two->size) | ||
153 | return false; | ||
154 | return true; | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T). | ||
159 | * The mem_avoid array is used to store the ranges that need to be avoided | ||
160 | * when KASLR searches for an appropriate random address. We must avoid any | ||
161 | * regions that are unsafe to overlap with during decompression, and other | ||
162 | * things like the initrd, cmdline and boot_params. This comment seeks to | ||
163 | * explain mem_avoid as clearly as possible since incorrect mem_avoid | ||
164 | * memory ranges lead to really hard to debug boot failures. | ||
165 | * | ||
166 | * The initrd, cmdline, and boot_params are trivial to identify for | ||
167 | * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and | ||
168 | * MEM_AVOID_BOOTPARAMS respectively below. | ||
169 | * | ||
170 | * What is not obvious how to avoid is the range of memory that is used | ||
171 | * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover | ||
172 | * the compressed kernel (ZO) and its run space, which is used to extract | ||
173 | * the uncompressed kernel (VO) and relocs. | ||
174 | * | ||
175 | * ZO's full run size sits against the end of the decompression buffer, so | ||
176 | * we can calculate where text, data, bss, etc of ZO are positioned more | ||
177 | * easily. | ||
178 | * | ||
179 | * For additional background, the decompression calculations can be found | ||
180 | * in header.S, and the memory diagram is based on the one found in misc.c. | ||
181 | * | ||
182 | * The following conditions are already enforced by the image layouts and | ||
183 | * associated code: | ||
184 | * - input + input_size >= output + output_size | ||
185 | * - kernel_total_size <= init_size | ||
186 | * - kernel_total_size <= output_size (see Note below) | ||
187 | * - output + init_size >= output + output_size | ||
188 | * | ||
189 | * (Note that kernel_total_size and output_size have no fundamental | ||
190 | * relationship, but output_size is passed to choose_random_location | ||
191 | * as a maximum of the two. The diagram is showing a case where | ||
192 | * kernel_total_size is larger than output_size, but this case is | ||
193 | * handled by bumping output_size.) | ||
194 | * | ||
195 | * The above conditions can be illustrated by a diagram: | ||
196 | * | ||
197 | * 0 output input input+input_size output+init_size | ||
198 | * | | | | | | ||
199 | * | | | | | | ||
200 | * |-----|--------|--------|--------------|-----------|--|-------------| | ||
201 | * | | | | ||
202 | * | | | | ||
203 | * output+init_size-ZO_INIT_SIZE output+output_size output+kernel_total_size | ||
204 | * | ||
205 | * [output, output+init_size) is the entire memory range used for | ||
206 | * extracting the compressed image. | ||
207 | * | ||
208 | * [output, output+kernel_total_size) is the range needed for the | ||
209 | * uncompressed kernel (VO) and its run size (bss, brk, etc). | ||
210 | * | ||
211 | * [output, output+output_size) is VO plus relocs (i.e. the entire | ||
212 | * uncompressed payload contained by ZO). This is the area of the buffer | ||
213 | * written to during decompression. | ||
214 | * | ||
215 | * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case | ||
216 | * range of the copied ZO and decompression code. (i.e. the range | ||
217 | * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.) | ||
218 | * | ||
219 | * [input, input+input_size) is the original copied compressed image (ZO) | ||
220 | * (i.e. it does not include its run size). This range must be avoided | ||
221 | * because it contains the data used for decompression. | ||
222 | * | ||
223 | * [input+input_size, output+init_size) is [_text, _end) for ZO. This | ||
224 | * range includes ZO's heap and stack, and must be avoided since it | ||
225 | * performs the decompression. | ||
226 | * | ||
227 | * Since the above two ranges need to be avoided and they are adjacent, | ||
228 | * they can be merged, resulting in: [input, output+init_size) which | ||
229 | * becomes the MEM_AVOID_ZO_RANGE below. | ||
230 | */ | ||
231 | static void mem_avoid_init(unsigned long input, unsigned long input_size, | ||
232 | unsigned long output) | ||
233 | { | ||
234 | unsigned long init_size = boot_params->hdr.init_size; | ||
235 | u64 initrd_start, initrd_size; | ||
236 | u64 cmd_line, cmd_line_size; | ||
237 | char *ptr; | ||
238 | |||
239 | /* | ||
240 | * Avoid the region that is unsafe to overlap during | ||
241 | * decompression. | ||
242 | */ | ||
243 | mem_avoid[MEM_AVOID_ZO_RANGE].start = input; | ||
244 | mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; | ||
245 | add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start, | ||
246 | mem_avoid[MEM_AVOID_ZO_RANGE].size); | ||
247 | |||
248 | /* Avoid initrd. */ | ||
249 | initrd_start = (u64)boot_params->ext_ramdisk_image << 32; | ||
250 | initrd_start |= boot_params->hdr.ramdisk_image; | ||
251 | initrd_size = (u64)boot_params->ext_ramdisk_size << 32; | ||
252 | initrd_size |= boot_params->hdr.ramdisk_size; | ||
253 | mem_avoid[MEM_AVOID_INITRD].start = initrd_start; | ||
254 | mem_avoid[MEM_AVOID_INITRD].size = initrd_size; | ||
255 | /* No need to set mapping for initrd, it will be handled in VO. */ | ||
256 | |||
257 | /* Avoid kernel command line. */ | ||
258 | cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32; | ||
259 | cmd_line |= boot_params->hdr.cmd_line_ptr; | ||
260 | /* Calculate size of cmd_line. */ | ||
261 | ptr = (char *)(unsigned long)cmd_line; | ||
262 | for (cmd_line_size = 0; ptr[cmd_line_size++]; ) | ||
263 | ; | ||
264 | mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; | ||
265 | mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; | ||
266 | add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start, | ||
267 | mem_avoid[MEM_AVOID_CMDLINE].size); | ||
268 | |||
269 | /* Avoid boot parameters. */ | ||
270 | mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; | ||
271 | mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); | ||
272 | add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start, | ||
273 | mem_avoid[MEM_AVOID_BOOTPARAMS].size); | ||
274 | |||
275 | /* We don't need to set a mapping for setup_data. */ | ||
276 | |||
277 | #ifdef CONFIG_X86_VERBOSE_BOOTUP | ||
278 | /* Make sure video RAM can be used. */ | ||
279 | add_identity_map(0, PMD_SIZE); | ||
280 | #endif | ||
281 | } | ||
282 | |||
283 | /* | ||
284 | * Does this memory vector overlap a known avoided area? If so, record the | ||
285 | * overlap region with the lowest address. | ||
286 | */ | ||
287 | static bool mem_avoid_overlap(struct mem_vector *img, | ||
288 | struct mem_vector *overlap) | ||
289 | { | ||
290 | int i; | ||
291 | struct setup_data *ptr; | ||
292 | unsigned long earliest = img->start + img->size; | ||
293 | bool is_overlapping = false; | ||
294 | |||
295 | for (i = 0; i < MEM_AVOID_MAX; i++) { | ||
296 | if (mem_overlaps(img, &mem_avoid[i]) && | ||
297 | mem_avoid[i].start < earliest) { | ||
298 | *overlap = mem_avoid[i]; | ||
299 | is_overlapping = true; | ||
300 | } | ||
301 | } | ||
302 | |||
303 | /* Avoid all entries in the setup_data linked list. */ | ||
304 | ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data; | ||
305 | while (ptr) { | ||
306 | struct mem_vector avoid; | ||
307 | |||
308 | avoid.start = (unsigned long)ptr; | ||
309 | avoid.size = sizeof(*ptr) + ptr->len; | ||
310 | |||
311 | if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { | ||
312 | *overlap = avoid; | ||
313 | is_overlapping = true; | ||
314 | } | ||
315 | |||
316 | ptr = (struct setup_data *)(unsigned long)ptr->next; | ||
317 | } | ||
318 | |||
319 | return is_overlapping; | ||
320 | } | ||
321 | |||
322 | static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN]; | ||
323 | |||
324 | struct slot_area { | ||
325 | unsigned long addr; | ||
326 | int num; | ||
327 | }; | ||
328 | |||
329 | #define MAX_SLOT_AREA 100 | ||
330 | |||
331 | static struct slot_area slot_areas[MAX_SLOT_AREA]; | ||
332 | |||
333 | static unsigned long slot_max; | ||
334 | |||
335 | static unsigned long slot_area_index; | ||
336 | |||
337 | static void store_slot_info(struct mem_vector *region, unsigned long image_size) | ||
338 | { | ||
339 | struct slot_area slot_area; | ||
340 | |||
341 | if (slot_area_index == MAX_SLOT_AREA) | ||
342 | return; | ||
343 | |||
344 | slot_area.addr = region->start; | ||
345 | slot_area.num = (region->size - image_size) / | ||
346 | CONFIG_PHYSICAL_ALIGN + 1; | ||
347 | |||
348 | if (slot_area.num > 0) { | ||
349 | slot_areas[slot_area_index++] = slot_area; | ||
350 | slot_max += slot_area.num; | ||
351 | } | ||
352 | } | ||
353 | |||
354 | static void slots_append(unsigned long addr) | ||
355 | { | ||
356 | /* Overflowing the slots list should be impossible. */ | ||
357 | if (slot_max >= KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN) | ||
358 | return; | ||
359 | |||
360 | slots[slot_max++] = addr; | ||
361 | } | ||
362 | |||
363 | static unsigned long slots_fetch_random(void) | ||
364 | { | ||
365 | /* Handle case of no slots stored. */ | ||
366 | if (slot_max == 0) | ||
367 | return 0; | ||
368 | |||
369 | return slots[get_random_long("Physical") % slot_max]; | ||
370 | } | ||
371 | |||
372 | static void process_e820_entry(struct e820entry *entry, | ||
373 | unsigned long minimum, | ||
374 | unsigned long image_size) | ||
375 | { | ||
376 | struct mem_vector region, img, overlap; | ||
377 | |||
378 | /* Skip non-RAM entries. */ | ||
379 | if (entry->type != E820_RAM) | ||
380 | return; | ||
381 | |||
382 | /* Ignore entries entirely above our maximum. */ | ||
383 | if (entry->addr >= KERNEL_IMAGE_SIZE) | ||
384 | return; | ||
385 | |||
386 | /* Ignore entries entirely below our minimum. */ | ||
387 | if (entry->addr + entry->size < minimum) | ||
388 | return; | ||
389 | |||
390 | region.start = entry->addr; | ||
391 | region.size = entry->size; | ||
392 | |||
393 | /* Potentially raise address to minimum location. */ | ||
394 | if (region.start < minimum) | ||
395 | region.start = minimum; | ||
396 | |||
397 | /* Potentially raise address to meet alignment requirements. */ | ||
398 | region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); | ||
399 | |||
400 | /* Did we raise the address above the bounds of this e820 region? */ | ||
401 | if (region.start > entry->addr + entry->size) | ||
402 | return; | ||
403 | |||
404 | /* Reduce size by any delta from the original address. */ | ||
405 | region.size -= region.start - entry->addr; | ||
406 | |||
407 | /* Reduce maximum size to fit end of image within maximum limit. */ | ||
408 | if (region.start + region.size > KERNEL_IMAGE_SIZE) | ||
409 | region.size = KERNEL_IMAGE_SIZE - region.start; | ||
410 | |||
411 | /* Walk each aligned slot and check for avoided areas. */ | ||
412 | for (img.start = region.start, img.size = image_size ; | ||
413 | mem_contains(®ion, &img) ; | ||
414 | img.start += CONFIG_PHYSICAL_ALIGN) { | ||
415 | if (mem_avoid_overlap(&img, &overlap)) | ||
416 | continue; | ||
417 | slots_append(img.start); | ||
418 | } | ||
419 | } | ||
420 | |||
421 | static unsigned long find_random_phys_addr(unsigned long minimum, | ||
422 | unsigned long image_size) | ||
423 | { | ||
424 | int i; | ||
425 | unsigned long addr; | ||
426 | |||
427 | /* Make sure minimum is aligned. */ | ||
428 | minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); | ||
429 | |||
430 | /* Verify potential e820 positions, appending to slots list. */ | ||
431 | for (i = 0; i < boot_params->e820_entries; i++) { | ||
432 | process_e820_entry(&boot_params->e820_map[i], minimum, | ||
433 | image_size); | ||
434 | } | ||
435 | |||
436 | return slots_fetch_random(); | ||
437 | } | ||
438 | |||
439 | static unsigned long find_random_virt_addr(unsigned long minimum, | ||
440 | unsigned long image_size) | ||
441 | { | ||
442 | unsigned long slots, random_addr; | ||
443 | |||
444 | /* Make sure minimum is aligned. */ | ||
445 | minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); | ||
446 | /* Align image_size for easy slot calculations. */ | ||
447 | image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN); | ||
448 | |||
449 | /* | ||
450 | * There are how many CONFIG_PHYSICAL_ALIGN-sized slots | ||
451 | * that can hold image_size within the range of minimum to | ||
452 | * KERNEL_IMAGE_SIZE? | ||
453 | */ | ||
454 | slots = (KERNEL_IMAGE_SIZE - minimum - image_size) / | ||
455 | CONFIG_PHYSICAL_ALIGN + 1; | ||
456 | |||
457 | random_addr = get_random_long("Virtual") % slots; | ||
458 | |||
459 | return random_addr * CONFIG_PHYSICAL_ALIGN + minimum; | ||
460 | } | ||
461 | |||
462 | /* | ||
463 | * Since this function examines addresses much more numerically, | ||
464 | * it takes the input and output pointers as 'unsigned long'. | ||
465 | */ | ||
466 | unsigned char *choose_random_location(unsigned long input, | ||
467 | unsigned long input_size, | ||
468 | unsigned long output, | ||
469 | unsigned long output_size) | ||
470 | { | ||
471 | unsigned long choice = output; | ||
472 | unsigned long random_addr; | ||
473 | |||
474 | #ifdef CONFIG_HIBERNATION | ||
475 | if (!cmdline_find_option_bool("kaslr")) { | ||
476 | warn("KASLR disabled: 'kaslr' not on cmdline (hibernation selected)."); | ||
477 | goto out; | ||
478 | } | ||
479 | #else | ||
480 | if (cmdline_find_option_bool("nokaslr")) { | ||
481 | warn("KASLR disabled: 'nokaslr' on cmdline."); | ||
482 | goto out; | ||
483 | } | ||
484 | #endif | ||
485 | |||
486 | boot_params->hdr.loadflags |= KASLR_FLAG; | ||
487 | |||
488 | /* Record the various known unsafe memory ranges. */ | ||
489 | mem_avoid_init(input, input_size, output); | ||
490 | |||
491 | /* Walk e820 and find a random address. */ | ||
492 | random_addr = find_random_phys_addr(output, output_size); | ||
493 | if (!random_addr) { | ||
494 | warn("KASLR disabled: could not find suitable E820 region!"); | ||
495 | goto out; | ||
496 | } | ||
497 | |||
498 | /* Always enforce the minimum. */ | ||
499 | if (random_addr < choice) | ||
500 | goto out; | ||
501 | |||
502 | choice = random_addr; | ||
503 | |||
504 | add_identity_map(choice, output_size); | ||
505 | |||
506 | /* This actually loads the identity pagetable on x86_64. */ | ||
507 | finalize_identity_maps(); | ||
508 | out: | ||
509 | return (unsigned char *)choice; | ||
510 | } | ||
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 79dac1758e7c..f14db4e21654 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -1,8 +1,10 @@ | |||
1 | /* | 1 | /* |
2 | * misc.c | 2 | * misc.c |
3 | * | 3 | * |
4 | * This is a collection of several routines from gzip-1.0.3 | 4 | * This is a collection of several routines used to extract the kernel |
5 | * adapted for Linux. | 5 | * which includes KASLR relocation, decompression, ELF parsing, and |
6 | * relocation processing. Additionally included are the screen and serial | ||
7 | * output functions and related debugging support functions. | ||
6 | * | 8 | * |
7 | * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 | 9 | * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 |
8 | * puts by Nick Holloway 1993, better puts by Martin Mares 1995 | 10 | * puts by Nick Holloway 1993, better puts by Martin Mares 1995 |
@@ -10,111 +12,37 @@ | |||
10 | */ | 12 | */ |
11 | 13 | ||
12 | #include "misc.h" | 14 | #include "misc.h" |
15 | #include "error.h" | ||
13 | #include "../string.h" | 16 | #include "../string.h" |
14 | 17 | #include "../voffset.h" | |
15 | /* WARNING!! | ||
16 | * This code is compiled with -fPIC and it is relocated dynamically | ||
17 | * at run time, but no relocation processing is performed. | ||
18 | * This means that it is not safe to place pointers in static structures. | ||
19 | */ | ||
20 | 18 | ||
21 | /* | 19 | /* |
22 | * Getting to provable safe in place decompression is hard. | 20 | * WARNING!! |
23 | * Worst case behaviours need to be analyzed. | 21 | * This code is compiled with -fPIC and it is relocated dynamically at |
24 | * Background information: | 22 | * run time, but no relocation processing is performed. This means that |
25 | * | 23 | * it is not safe to place pointers in static structures. |
26 | * The file layout is: | ||
27 | * magic[2] | ||
28 | * method[1] | ||
29 | * flags[1] | ||
30 | * timestamp[4] | ||
31 | * extraflags[1] | ||
32 | * os[1] | ||
33 | * compressed data blocks[N] | ||
34 | * crc[4] orig_len[4] | ||
35 | * | ||
36 | * resulting in 18 bytes of non compressed data overhead. | ||
37 | * | ||
38 | * Files divided into blocks | ||
39 | * 1 bit (last block flag) | ||
40 | * 2 bits (block type) | ||
41 | * | ||
42 | * 1 block occurs every 32K -1 bytes or when there 50% compression | ||
43 | * has been achieved. The smallest block type encoding is always used. | ||
44 | * | ||
45 | * stored: | ||
46 | * 32 bits length in bytes. | ||
47 | * | ||
48 | * fixed: | ||
49 | * magic fixed tree. | ||
50 | * symbols. | ||
51 | * | ||
52 | * dynamic: | ||
53 | * dynamic tree encoding. | ||
54 | * symbols. | ||
55 | * | ||
56 | * | ||
57 | * The buffer for decompression in place is the length of the | ||
58 | * uncompressed data, plus a small amount extra to keep the algorithm safe. | ||
59 | * The compressed data is placed at the end of the buffer. The output | ||
60 | * pointer is placed at the start of the buffer and the input pointer | ||
61 | * is placed where the compressed data starts. Problems will occur | ||
62 | * when the output pointer overruns the input pointer. | ||
63 | * | ||
64 | * The output pointer can only overrun the input pointer if the input | ||
65 | * pointer is moving faster than the output pointer. A condition only | ||
66 | * triggered by data whose compressed form is larger than the uncompressed | ||
67 | * form. | ||
68 | * | ||
69 | * The worst case at the block level is a growth of the compressed data | ||
70 | * of 5 bytes per 32767 bytes. | ||
71 | * | ||
72 | * The worst case internal to a compressed block is very hard to figure. | ||
73 | * The worst case can at least be boundined by having one bit that represents | ||
74 | * 32764 bytes and then all of the rest of the bytes representing the very | ||
75 | * very last byte. | ||
76 | * | ||
77 | * All of which is enough to compute an amount of extra data that is required | ||
78 | * to be safe. To avoid problems at the block level allocating 5 extra bytes | ||
79 | * per 32767 bytes of data is sufficient. To avoind problems internal to a | ||
80 | * block adding an extra 32767 bytes (the worst case uncompressed block size) | ||
81 | * is sufficient, to ensure that in the worst case the decompressed data for | ||
82 | * block will stop the byte before the compressed data for a block begins. | ||
83 | * To avoid problems with the compressed data's meta information an extra 18 | ||
84 | * bytes are needed. Leading to the formula: | ||
85 | * | ||
86 | * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size. | ||
87 | * | ||
88 | * Adding 8 bytes per 32K is a bit excessive but much easier to calculate. | ||
89 | * Adding 32768 instead of 32767 just makes for round numbers. | ||
90 | * Adding the decompressor_size is necessary as it musht live after all | ||
91 | * of the data as well. Last I measured the decompressor is about 14K. | ||
92 | * 10K of actual data and 4K of bss. | ||
93 | * | ||
94 | */ | 24 | */ |
95 | 25 | ||
96 | /* | 26 | /* Macros used by the included decompressor code below. */ |
97 | * gzip declarations | ||
98 | */ | ||
99 | #define STATIC static | 27 | #define STATIC static |
100 | 28 | ||
101 | #undef memcpy | ||
102 | |||
103 | /* | 29 | /* |
104 | * Use a normal definition of memset() from string.c. There are already | 30 | * Use normal definitions of mem*() from string.c. There are already |
105 | * included header files which expect a definition of memset() and by | 31 | * included header files which expect a definition of memset() and by |
106 | * the time we define memset macro, it is too late. | 32 | * the time we define memset macro, it is too late. |
107 | */ | 33 | */ |
34 | #undef memcpy | ||
108 | #undef memset | 35 | #undef memset |
109 | #define memzero(s, n) memset((s), 0, (n)) | 36 | #define memzero(s, n) memset((s), 0, (n)) |
37 | #define memmove memmove | ||
110 | 38 | ||
111 | 39 | /* Functions used by the included decompressor code below. */ | |
112 | static void error(char *m); | 40 | void *memmove(void *dest, const void *src, size_t n); |
113 | 41 | ||
114 | /* | 42 | /* |
115 | * This is set up by the setup-routine at boot-time | 43 | * This is set up by the setup-routine at boot-time |
116 | */ | 44 | */ |
117 | struct boot_params *real_mode; /* Pointer to real-mode data */ | 45 | struct boot_params *boot_params; |
118 | 46 | ||
119 | memptr free_mem_ptr; | 47 | memptr free_mem_ptr; |
120 | memptr free_mem_end_ptr; | 48 | memptr free_mem_end_ptr; |
@@ -146,12 +74,16 @@ static int lines, cols; | |||
146 | #ifdef CONFIG_KERNEL_LZ4 | 74 | #ifdef CONFIG_KERNEL_LZ4 |
147 | #include "../../../../lib/decompress_unlz4.c" | 75 | #include "../../../../lib/decompress_unlz4.c" |
148 | #endif | 76 | #endif |
77 | /* | ||
78 | * NOTE: When adding a new decompressor, please update the analysis in | ||
79 | * ../header.S. | ||
80 | */ | ||
149 | 81 | ||
150 | static void scroll(void) | 82 | static void scroll(void) |
151 | { | 83 | { |
152 | int i; | 84 | int i; |
153 | 85 | ||
154 | memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2); | 86 | memmove(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2); |
155 | for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2) | 87 | for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2) |
156 | vidmem[i] = ' '; | 88 | vidmem[i] = ' '; |
157 | } | 89 | } |
@@ -184,12 +116,12 @@ void __putstr(const char *s) | |||
184 | } | 116 | } |
185 | } | 117 | } |
186 | 118 | ||
187 | if (real_mode->screen_info.orig_video_mode == 0 && | 119 | if (boot_params->screen_info.orig_video_mode == 0 && |
188 | lines == 0 && cols == 0) | 120 | lines == 0 && cols == 0) |
189 | return; | 121 | return; |
190 | 122 | ||
191 | x = real_mode->screen_info.orig_x; | 123 | x = boot_params->screen_info.orig_x; |
192 | y = real_mode->screen_info.orig_y; | 124 | y = boot_params->screen_info.orig_y; |
193 | 125 | ||
194 | while ((c = *s++) != '\0') { | 126 | while ((c = *s++) != '\0') { |
195 | if (c == '\n') { | 127 | if (c == '\n') { |
@@ -210,8 +142,8 @@ void __putstr(const char *s) | |||
210 | } | 142 | } |
211 | } | 143 | } |
212 | 144 | ||
213 | real_mode->screen_info.orig_x = x; | 145 | boot_params->screen_info.orig_x = x; |
214 | real_mode->screen_info.orig_y = y; | 146 | boot_params->screen_info.orig_y = y; |
215 | 147 | ||
216 | pos = (x + cols * y) * 2; /* Update cursor position */ | 148 | pos = (x + cols * y) * 2; /* Update cursor position */ |
217 | outb(14, vidport); | 149 | outb(14, vidport); |
@@ -237,23 +169,13 @@ void __puthex(unsigned long value) | |||
237 | } | 169 | } |
238 | } | 170 | } |
239 | 171 | ||
240 | static void error(char *x) | ||
241 | { | ||
242 | error_putstr("\n\n"); | ||
243 | error_putstr(x); | ||
244 | error_putstr("\n\n -- System halted"); | ||
245 | |||
246 | while (1) | ||
247 | asm("hlt"); | ||
248 | } | ||
249 | |||
250 | #if CONFIG_X86_NEED_RELOCS | 172 | #if CONFIG_X86_NEED_RELOCS |
251 | static void handle_relocations(void *output, unsigned long output_len) | 173 | static void handle_relocations(void *output, unsigned long output_len) |
252 | { | 174 | { |
253 | int *reloc; | 175 | int *reloc; |
254 | unsigned long delta, map, ptr; | 176 | unsigned long delta, map, ptr; |
255 | unsigned long min_addr = (unsigned long)output; | 177 | unsigned long min_addr = (unsigned long)output; |
256 | unsigned long max_addr = min_addr + output_len; | 178 | unsigned long max_addr = min_addr + (VO___bss_start - VO__text); |
257 | 179 | ||
258 | /* | 180 | /* |
259 | * Calculate the delta between where vmlinux was linked to load | 181 | * Calculate the delta between where vmlinux was linked to load |
@@ -295,7 +217,7 @@ static void handle_relocations(void *output, unsigned long output_len) | |||
295 | * So we work backwards from the end of the decompressed image. | 217 | * So we work backwards from the end of the decompressed image. |
296 | */ | 218 | */ |
297 | for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) { | 219 | for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) { |
298 | int extended = *reloc; | 220 | long extended = *reloc; |
299 | extended += map; | 221 | extended += map; |
300 | 222 | ||
301 | ptr = (unsigned long)extended; | 223 | ptr = (unsigned long)extended; |
@@ -372,9 +294,7 @@ static void parse_elf(void *output) | |||
372 | #else | 294 | #else |
373 | dest = (void *)(phdr->p_paddr); | 295 | dest = (void *)(phdr->p_paddr); |
374 | #endif | 296 | #endif |
375 | memcpy(dest, | 297 | memmove(dest, output + phdr->p_offset, phdr->p_filesz); |
376 | output + phdr->p_offset, | ||
377 | phdr->p_filesz); | ||
378 | break; | 298 | break; |
379 | default: /* Ignore other PT_* */ break; | 299 | default: /* Ignore other PT_* */ break; |
380 | } | 300 | } |
@@ -383,23 +303,41 @@ static void parse_elf(void *output) | |||
383 | free(phdrs); | 303 | free(phdrs); |
384 | } | 304 | } |
385 | 305 | ||
386 | asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, | 306 | /* |
307 | * The compressed kernel image (ZO), has been moved so that its position | ||
308 | * is against the end of the buffer used to hold the uncompressed kernel | ||
309 | * image (VO) and the execution environment (.bss, .brk), which makes sure | ||
310 | * there is room to do the in-place decompression. (See header.S for the | ||
311 | * calculations.) | ||
312 | * | ||
313 | * |-----compressed kernel image------| | ||
314 | * V V | ||
315 | * 0 extract_offset +INIT_SIZE | ||
316 | * |-----------|---------------|-------------------------|--------| | ||
317 | * | | | | | ||
318 | * VO__text startup_32 of ZO VO__end ZO__end | ||
319 | * ^ ^ | ||
320 | * |-------uncompressed kernel image---------| | ||
321 | * | ||
322 | */ | ||
323 | asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, | ||
387 | unsigned char *input_data, | 324 | unsigned char *input_data, |
388 | unsigned long input_len, | 325 | unsigned long input_len, |
389 | unsigned char *output, | 326 | unsigned char *output, |
390 | unsigned long output_len, | 327 | unsigned long output_len) |
391 | unsigned long run_size) | ||
392 | { | 328 | { |
329 | const unsigned long kernel_total_size = VO__end - VO__text; | ||
393 | unsigned char *output_orig = output; | 330 | unsigned char *output_orig = output; |
394 | 331 | ||
395 | real_mode = rmode; | 332 | /* Retain x86 boot parameters pointer passed from startup_32/64. */ |
333 | boot_params = rmode; | ||
396 | 334 | ||
397 | /* Clear it for solely in-kernel use */ | 335 | /* Clear flags intended for solely in-kernel use. */ |
398 | real_mode->hdr.loadflags &= ~KASLR_FLAG; | 336 | boot_params->hdr.loadflags &= ~KASLR_FLAG; |
399 | 337 | ||
400 | sanitize_boot_params(real_mode); | 338 | sanitize_boot_params(boot_params); |
401 | 339 | ||
402 | if (real_mode->screen_info.orig_video_mode == 7) { | 340 | if (boot_params->screen_info.orig_video_mode == 7) { |
403 | vidmem = (char *) 0xb0000; | 341 | vidmem = (char *) 0xb0000; |
404 | vidport = 0x3b4; | 342 | vidport = 0x3b4; |
405 | } else { | 343 | } else { |
@@ -407,11 +345,11 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, | |||
407 | vidport = 0x3d4; | 345 | vidport = 0x3d4; |
408 | } | 346 | } |
409 | 347 | ||
410 | lines = real_mode->screen_info.orig_video_lines; | 348 | lines = boot_params->screen_info.orig_video_lines; |
411 | cols = real_mode->screen_info.orig_video_cols; | 349 | cols = boot_params->screen_info.orig_video_cols; |
412 | 350 | ||
413 | console_init(); | 351 | console_init(); |
414 | debug_putstr("early console in decompress_kernel\n"); | 352 | debug_putstr("early console in extract_kernel\n"); |
415 | 353 | ||
416 | free_mem_ptr = heap; /* Heap */ | 354 | free_mem_ptr = heap; /* Heap */ |
417 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; | 355 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; |
@@ -421,16 +359,16 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, | |||
421 | debug_putaddr(input_len); | 359 | debug_putaddr(input_len); |
422 | debug_putaddr(output); | 360 | debug_putaddr(output); |
423 | debug_putaddr(output_len); | 361 | debug_putaddr(output_len); |
424 | debug_putaddr(run_size); | 362 | debug_putaddr(kernel_total_size); |
425 | 363 | ||
426 | /* | 364 | /* |
427 | * The memory hole needed for the kernel is the larger of either | 365 | * The memory hole needed for the kernel is the larger of either |
428 | * the entire decompressed kernel plus relocation table, or the | 366 | * the entire decompressed kernel plus relocation table, or the |
429 | * entire decompressed kernel plus .bss and .brk sections. | 367 | * entire decompressed kernel plus .bss and .brk sections. |
430 | */ | 368 | */ |
431 | output = choose_kernel_location(real_mode, input_data, input_len, output, | 369 | output = choose_random_location((unsigned long)input_data, input_len, |
432 | output_len > run_size ? output_len | 370 | (unsigned long)output, |
433 | : run_size); | 371 | max(output_len, kernel_total_size)); |
434 | 372 | ||
435 | /* Validate memory location choices. */ | 373 | /* Validate memory location choices. */ |
436 | if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) | 374 | if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) |
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 3783dc3e10b3..b6fec1ff10e4 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h | |||
@@ -32,7 +32,7 @@ | |||
32 | /* misc.c */ | 32 | /* misc.c */ |
33 | extern memptr free_mem_ptr; | 33 | extern memptr free_mem_ptr; |
34 | extern memptr free_mem_end_ptr; | 34 | extern memptr free_mem_end_ptr; |
35 | extern struct boot_params *real_mode; /* Pointer to real-mode data */ | 35 | extern struct boot_params *boot_params; |
36 | void __putstr(const char *s); | 36 | void __putstr(const char *s); |
37 | void __puthex(unsigned long value); | 37 | void __puthex(unsigned long value); |
38 | #define error_putstr(__x) __putstr(__x) | 38 | #define error_putstr(__x) __putstr(__x) |
@@ -66,26 +66,35 @@ int cmdline_find_option_bool(const char *option); | |||
66 | 66 | ||
67 | 67 | ||
68 | #if CONFIG_RANDOMIZE_BASE | 68 | #if CONFIG_RANDOMIZE_BASE |
69 | /* aslr.c */ | 69 | /* kaslr.c */ |
70 | unsigned char *choose_kernel_location(struct boot_params *boot_params, | 70 | unsigned char *choose_random_location(unsigned long input_ptr, |
71 | unsigned char *input, | ||
72 | unsigned long input_size, | 71 | unsigned long input_size, |
73 | unsigned char *output, | 72 | unsigned long output_ptr, |
74 | unsigned long output_size); | 73 | unsigned long output_size); |
75 | /* cpuflags.c */ | 74 | /* cpuflags.c */ |
76 | bool has_cpuflag(int flag); | 75 | bool has_cpuflag(int flag); |
77 | #else | 76 | #else |
78 | static inline | 77 | static inline |
79 | unsigned char *choose_kernel_location(struct boot_params *boot_params, | 78 | unsigned char *choose_random_location(unsigned long input_ptr, |
80 | unsigned char *input, | ||
81 | unsigned long input_size, | 79 | unsigned long input_size, |
82 | unsigned char *output, | 80 | unsigned long output_ptr, |
83 | unsigned long output_size) | 81 | unsigned long output_size) |
84 | { | 82 | { |
85 | return output; | 83 | return (unsigned char *)output_ptr; |
86 | } | 84 | } |
87 | #endif | 85 | #endif |
88 | 86 | ||
87 | #ifdef CONFIG_X86_64 | ||
88 | void add_identity_map(unsigned long start, unsigned long size); | ||
89 | void finalize_identity_maps(void); | ||
90 | extern unsigned char _pgtable[]; | ||
91 | #else | ||
92 | static inline void add_identity_map(unsigned long start, unsigned long size) | ||
93 | { } | ||
94 | static inline void finalize_identity_maps(void) | ||
95 | { } | ||
96 | #endif | ||
97 | |||
89 | #ifdef CONFIG_EARLY_PRINTK | 98 | #ifdef CONFIG_EARLY_PRINTK |
90 | /* early_serial_console.c */ | 99 | /* early_serial_console.c */ |
91 | extern int early_serial_base; | 100 | extern int early_serial_base; |
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index d8222f213182..72bad2c8debe 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c | |||
@@ -18,11 +18,10 @@ | |||
18 | * | 18 | * |
19 | * H. Peter Anvin <hpa@linux.intel.com> | 19 | * H. Peter Anvin <hpa@linux.intel.com> |
20 | * | 20 | * |
21 | * ----------------------------------------------------------------------- */ | 21 | * ----------------------------------------------------------------------- |
22 | 22 | * | |
23 | /* | 23 | * Outputs a small assembly wrapper with the appropriate symbols defined. |
24 | * Compute the desired load offset from a compressed program; outputs | 24 | * |
25 | * a small assembly wrapper with the appropriate symbols defined. | ||
26 | */ | 25 | */ |
27 | 26 | ||
28 | #include <stdlib.h> | 27 | #include <stdlib.h> |
@@ -35,14 +34,11 @@ int main(int argc, char *argv[]) | |||
35 | { | 34 | { |
36 | uint32_t olen; | 35 | uint32_t olen; |
37 | long ilen; | 36 | long ilen; |
38 | unsigned long offs; | ||
39 | unsigned long run_size; | ||
40 | FILE *f = NULL; | 37 | FILE *f = NULL; |
41 | int retval = 1; | 38 | int retval = 1; |
42 | 39 | ||
43 | if (argc < 3) { | 40 | if (argc < 2) { |
44 | fprintf(stderr, "Usage: %s compressed_file run_size\n", | 41 | fprintf(stderr, "Usage: %s compressed_file\n", argv[0]); |
45 | argv[0]); | ||
46 | goto bail; | 42 | goto bail; |
47 | } | 43 | } |
48 | 44 | ||
@@ -67,29 +63,11 @@ int main(int argc, char *argv[]) | |||
67 | ilen = ftell(f); | 63 | ilen = ftell(f); |
68 | olen = get_unaligned_le32(&olen); | 64 | olen = get_unaligned_le32(&olen); |
69 | 65 | ||
70 | /* | ||
71 | * Now we have the input (compressed) and output (uncompressed) | ||
72 | * sizes, compute the necessary decompression offset... | ||
73 | */ | ||
74 | |||
75 | offs = (olen > ilen) ? olen - ilen : 0; | ||
76 | offs += olen >> 12; /* Add 8 bytes for each 32K block */ | ||
77 | offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */ | ||
78 | offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ | ||
79 | run_size = atoi(argv[2]); | ||
80 | |||
81 | printf(".section \".rodata..compressed\",\"a\",@progbits\n"); | 66 | printf(".section \".rodata..compressed\",\"a\",@progbits\n"); |
82 | printf(".globl z_input_len\n"); | 67 | printf(".globl z_input_len\n"); |
83 | printf("z_input_len = %lu\n", ilen); | 68 | printf("z_input_len = %lu\n", ilen); |
84 | printf(".globl z_output_len\n"); | 69 | printf(".globl z_output_len\n"); |
85 | printf("z_output_len = %lu\n", (unsigned long)olen); | 70 | printf("z_output_len = %lu\n", (unsigned long)olen); |
86 | printf(".globl z_extract_offset\n"); | ||
87 | printf("z_extract_offset = 0x%lx\n", offs); | ||
88 | /* z_extract_offset_negative allows simplification of head_32.S */ | ||
89 | printf(".globl z_extract_offset_negative\n"); | ||
90 | printf("z_extract_offset_negative = -0x%lx\n", offs); | ||
91 | printf(".globl z_run_size\n"); | ||
92 | printf("z_run_size = %lu\n", run_size); | ||
93 | 71 | ||
94 | printf(".globl input_data, input_data_end\n"); | 72 | printf(".globl input_data, input_data_end\n"); |
95 | printf("input_data:\n"); | 73 | printf("input_data:\n"); |
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c new file mode 100644 index 000000000000..34b95df14e69 --- /dev/null +++ b/arch/x86/boot/compressed/pagetable.c | |||
@@ -0,0 +1,129 @@ | |||
1 | /* | ||
2 | * This code is used on x86_64 to create page table identity mappings on | ||
3 | * demand by building up a new set of page tables (or appending to the | ||
4 | * existing ones), and then switching over to them when ready. | ||
5 | */ | ||
6 | |||
7 | /* | ||
8 | * Since we're dealing with identity mappings, physical and virtual | ||
9 | * addresses are the same, so override these defines which are ultimately | ||
10 | * used by the headers in misc.h. | ||
11 | */ | ||
12 | #define __pa(x) ((unsigned long)(x)) | ||
13 | #define __va(x) ((void *)((unsigned long)(x))) | ||
14 | |||
15 | #include "misc.h" | ||
16 | |||
17 | /* These actually do the work of building the kernel identity maps. */ | ||
18 | #include <asm/init.h> | ||
19 | #include <asm/pgtable.h> | ||
20 | #include "../../mm/ident_map.c" | ||
21 | |||
22 | /* Used by pgtable.h asm code to force instruction serialization. */ | ||
23 | unsigned long __force_order; | ||
24 | |||
25 | /* Used to track our page table allocation area. */ | ||
26 | struct alloc_pgt_data { | ||
27 | unsigned char *pgt_buf; | ||
28 | unsigned long pgt_buf_size; | ||
29 | unsigned long pgt_buf_offset; | ||
30 | }; | ||
31 | |||
32 | /* | ||
33 | * Allocates space for a page table entry, using struct alloc_pgt_data | ||
34 | * above. Besides the local callers, this is used as the allocation | ||
35 | * callback in mapping_info below. | ||
36 | */ | ||
37 | static void *alloc_pgt_page(void *context) | ||
38 | { | ||
39 | struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context; | ||
40 | unsigned char *entry; | ||
41 | |||
42 | /* Validate there is space available for a new page. */ | ||
43 | if (pages->pgt_buf_offset >= pages->pgt_buf_size) { | ||
44 | debug_putstr("out of pgt_buf in " __FILE__ "!?\n"); | ||
45 | debug_putaddr(pages->pgt_buf_offset); | ||
46 | debug_putaddr(pages->pgt_buf_size); | ||
47 | return NULL; | ||
48 | } | ||
49 | |||
50 | entry = pages->pgt_buf + pages->pgt_buf_offset; | ||
51 | pages->pgt_buf_offset += PAGE_SIZE; | ||
52 | |||
53 | return entry; | ||
54 | } | ||
55 | |||
56 | /* Used to track our allocated page tables. */ | ||
57 | static struct alloc_pgt_data pgt_data; | ||
58 | |||
59 | /* The top level page table entry pointer. */ | ||
60 | static unsigned long level4p; | ||
61 | |||
62 | /* Locates and clears a region for a new top level page table. */ | ||
63 | static void prepare_level4(void) | ||
64 | { | ||
65 | /* | ||
66 | * It should be impossible for this not to already be true, | ||
67 | * but since calling this a second time would rewind the other | ||
68 | * counters, let's just make sure this is reset too. | ||
69 | */ | ||
70 | pgt_data.pgt_buf_offset = 0; | ||
71 | |||
72 | /* | ||
73 | * If we came here via startup_32(), cr3 will be _pgtable already | ||
74 | * and we must append to the existing area instead of entirely | ||
75 | * overwriting it. | ||
76 | */ | ||
77 | level4p = read_cr3(); | ||
78 | if (level4p == (unsigned long)_pgtable) { | ||
79 | debug_putstr("booted via startup_32()\n"); | ||
80 | pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE; | ||
81 | pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE; | ||
82 | memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); | ||
83 | } else { | ||
84 | debug_putstr("booted via startup_64()\n"); | ||
85 | pgt_data.pgt_buf = _pgtable; | ||
86 | pgt_data.pgt_buf_size = BOOT_PGT_SIZE; | ||
87 | memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); | ||
88 | level4p = (unsigned long)alloc_pgt_page(&pgt_data); | ||
89 | } | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * Adds the specified range to what will become the new identity mappings. | ||
94 | * Once all ranges have been added, the new mapping is activated by calling | ||
95 | * finalize_identity_maps() below. | ||
96 | */ | ||
97 | void add_identity_map(unsigned long start, unsigned long size) | ||
98 | { | ||
99 | struct x86_mapping_info mapping_info = { | ||
100 | .alloc_pgt_page = alloc_pgt_page, | ||
101 | .context = &pgt_data, | ||
102 | .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, | ||
103 | }; | ||
104 | unsigned long end = start + size; | ||
105 | |||
106 | /* Make sure we have a top level page table ready to use. */ | ||
107 | if (!level4p) | ||
108 | prepare_level4(); | ||
109 | |||
110 | /* Align boundary to 2M. */ | ||
111 | start = round_down(start, PMD_SIZE); | ||
112 | end = round_up(end, PMD_SIZE); | ||
113 | if (start >= end) | ||
114 | return; | ||
115 | |||
116 | /* Build the mapping. */ | ||
117 | kernel_ident_mapping_init(&mapping_info, (pgd_t *)level4p, | ||
118 | start, end); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * This switches the page tables to the new level4 that has been built | ||
123 | * via calls to add_identity_map() above. If booted via startup_32(), | ||
124 | * this is effectively a no-op. | ||
125 | */ | ||
126 | void finalize_identity_maps(void) | ||
127 | { | ||
128 | write_cr3(level4p); | ||
129 | } | ||
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c index 00e788be1db9..cea140ce6b42 100644 --- a/arch/x86/boot/compressed/string.c +++ b/arch/x86/boot/compressed/string.c | |||
@@ -1,7 +1,16 @@ | |||
1 | /* | ||
2 | * This provides an optimized implementation of memcpy, and a simplified | ||
3 | * implementation of memset and memmove. These are used here because the | ||
4 | * standard kernel runtime versions are not yet available and we don't | ||
5 | * trust the gcc built-in implementations as they may do unexpected things | ||
6 | * (e.g. FPU ops) in the minimal decompression stub execution environment. | ||
7 | */ | ||
8 | #include "error.h" | ||
9 | |||
1 | #include "../string.c" | 10 | #include "../string.c" |
2 | 11 | ||
3 | #ifdef CONFIG_X86_32 | 12 | #ifdef CONFIG_X86_32 |
4 | void *memcpy(void *dest, const void *src, size_t n) | 13 | static void *__memcpy(void *dest, const void *src, size_t n) |
5 | { | 14 | { |
6 | int d0, d1, d2; | 15 | int d0, d1, d2; |
7 | asm volatile( | 16 | asm volatile( |
@@ -15,7 +24,7 @@ void *memcpy(void *dest, const void *src, size_t n) | |||
15 | return dest; | 24 | return dest; |
16 | } | 25 | } |
17 | #else | 26 | #else |
18 | void *memcpy(void *dest, const void *src, size_t n) | 27 | static void *__memcpy(void *dest, const void *src, size_t n) |
19 | { | 28 | { |
20 | long d0, d1, d2; | 29 | long d0, d1, d2; |
21 | asm volatile( | 30 | asm volatile( |
@@ -39,3 +48,27 @@ void *memset(void *s, int c, size_t n) | |||
39 | ss[i] = c; | 48 | ss[i] = c; |
40 | return s; | 49 | return s; |
41 | } | 50 | } |
51 | |||
52 | void *memmove(void *dest, const void *src, size_t n) | ||
53 | { | ||
54 | unsigned char *d = dest; | ||
55 | const unsigned char *s = src; | ||
56 | |||
57 | if (d <= s || d - s >= n) | ||
58 | return __memcpy(dest, src, n); | ||
59 | |||
60 | while (n-- > 0) | ||
61 | d[n] = s[n]; | ||
62 | |||
63 | return dest; | ||
64 | } | ||
65 | |||
66 | /* Detect and warn about potential overlaps, but handle them with memmove. */ | ||
67 | void *memcpy(void *dest, const void *src, size_t n) | ||
68 | { | ||
69 | if (dest > src && dest - src < n) { | ||
70 | warn("Avoiding potentially unsafe overlapping memcpy()!"); | ||
71 | return memmove(dest, src, n); | ||
72 | } | ||
73 | return __memcpy(dest, src, n); | ||
74 | } | ||
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 34d047c98284..e24e0a0c90c9 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S | |||
@@ -70,5 +70,6 @@ SECTIONS | |||
70 | _epgtable = . ; | 70 | _epgtable = . ; |
71 | } | 71 | } |
72 | #endif | 72 | #endif |
73 | . = ALIGN(PAGE_SIZE); /* keep ZO size page aligned */ | ||
73 | _end = .; | 74 | _end = .; |
74 | } | 75 | } |
diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c index 45a07684bbab..f0b8d6d93164 100644 --- a/arch/x86/boot/early_serial_console.c +++ b/arch/x86/boot/early_serial_console.c | |||
@@ -1,3 +1,7 @@ | |||
1 | /* | ||
2 | * Serial port routines for use during early boot reporting. This code is | ||
3 | * included from both the compressed kernel and the regular kernel. | ||
4 | */ | ||
1 | #include "boot.h" | 5 | #include "boot.h" |
2 | 6 | ||
3 | #define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */ | 7 | #define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */ |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 6236b9ec4b76..3dd5be33aaa7 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -440,13 +440,116 @@ setup_data: .quad 0 # 64-bit physical pointer to | |||
440 | 440 | ||
441 | pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr | 441 | pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr |
442 | 442 | ||
443 | #define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset) | 443 | # |
444 | # Getting to provably safe in-place decompression is hard. Worst case | ||
445 | # behaviours need to be analyzed. Here let's take the decompression of | ||
446 | # a gzip-compressed kernel as example, to illustrate it: | ||
447 | # | ||
448 | # The file layout of gzip compressed kernel is: | ||
449 | # | ||
450 | # magic[2] | ||
451 | # method[1] | ||
452 | # flags[1] | ||
453 | # timestamp[4] | ||
454 | # extraflags[1] | ||
455 | # os[1] | ||
456 | # compressed data blocks[N] | ||
457 | # crc[4] orig_len[4] | ||
458 | # | ||
459 | # ... resulting in +18 bytes overhead of uncompressed data. | ||
460 | # | ||
461 | # (For more information, please refer to RFC 1951 and RFC 1952.) | ||
462 | # | ||
463 | # Files divided into blocks | ||
464 | # 1 bit (last block flag) | ||
465 | # 2 bits (block type) | ||
466 | # | ||
467 | # 1 block occurs every 32K -1 bytes or when there 50% compression | ||
468 | # has been achieved. The smallest block type encoding is always used. | ||
469 | # | ||
470 | # stored: | ||
471 | # 32 bits length in bytes. | ||
472 | # | ||
473 | # fixed: | ||
474 | # magic fixed tree. | ||
475 | # symbols. | ||
476 | # | ||
477 | # dynamic: | ||
478 | # dynamic tree encoding. | ||
479 | # symbols. | ||
480 | # | ||
481 | # | ||
482 | # The buffer for decompression in place is the length of the uncompressed | ||
483 | # data, plus a small amount extra to keep the algorithm safe. The | ||
484 | # compressed data is placed at the end of the buffer. The output pointer | ||
485 | # is placed at the start of the buffer and the input pointer is placed | ||
486 | # where the compressed data starts. Problems will occur when the output | ||
487 | # pointer overruns the input pointer. | ||
488 | # | ||
489 | # The output pointer can only overrun the input pointer if the input | ||
490 | # pointer is moving faster than the output pointer. A condition only | ||
491 | # triggered by data whose compressed form is larger than the uncompressed | ||
492 | # form. | ||
493 | # | ||
494 | # The worst case at the block level is a growth of the compressed data | ||
495 | # of 5 bytes per 32767 bytes. | ||
496 | # | ||
497 | # The worst case internal to a compressed block is very hard to figure. | ||
498 | # The worst case can at least be bounded by having one bit that represents | ||
499 | # 32764 bytes and then all of the rest of the bytes representing the very | ||
500 | # very last byte. | ||
501 | # | ||
502 | # All of which is enough to compute an amount of extra data that is required | ||
503 | # to be safe. To avoid problems at the block level allocating 5 extra bytes | ||
504 | # per 32767 bytes of data is sufficient. To avoid problems internal to a | ||
505 | # block adding an extra 32767 bytes (the worst case uncompressed block size) | ||
506 | # is sufficient, to ensure that in the worst case the decompressed data for | ||
507 | # block will stop the byte before the compressed data for a block begins. | ||
508 | # To avoid problems with the compressed data's meta information an extra 18 | ||
509 | # bytes are needed. Leading to the formula: | ||
510 | # | ||
511 | # extra_bytes = (uncompressed_size >> 12) + 32768 + 18 | ||
512 | # | ||
513 | # Adding 8 bytes per 32K is a bit excessive but much easier to calculate. | ||
514 | # Adding 32768 instead of 32767 just makes for round numbers. | ||
515 | # | ||
516 | # Above analysis is for decompressing gzip compressed kernel only. Up to | ||
517 | # now 6 different decompressor are supported all together. And among them | ||
518 | # xz stores data in chunks and has maximum chunk of 64K. Hence safety | ||
519 | # margin should be updated to cover all decompressors so that we don't | ||
520 | # need to deal with each of them separately. Please check | ||
521 | # the description in lib/decompressor_xxx.c for specific information. | ||
522 | # | ||
523 | # extra_bytes = (uncompressed_size >> 12) + 65536 + 128 | ||
524 | |||
525 | #define ZO_z_extra_bytes ((ZO_z_output_len >> 12) + 65536 + 128) | ||
526 | #if ZO_z_output_len > ZO_z_input_len | ||
527 | # define ZO_z_extract_offset (ZO_z_output_len + ZO_z_extra_bytes - \ | ||
528 | ZO_z_input_len) | ||
529 | #else | ||
530 | # define ZO_z_extract_offset ZO_z_extra_bytes | ||
531 | #endif | ||
532 | |||
533 | /* | ||
534 | * The extract_offset has to be bigger than ZO head section. Otherwise when | ||
535 | * the head code is running to move ZO to the end of the buffer, it will | ||
536 | * overwrite the head code itself. | ||
537 | */ | ||
538 | #if (ZO__ehead - ZO_startup_32) > ZO_z_extract_offset | ||
539 | # define ZO_z_min_extract_offset ((ZO__ehead - ZO_startup_32 + 4095) & ~4095) | ||
540 | #else | ||
541 | # define ZO_z_min_extract_offset ((ZO_z_extract_offset + 4095) & ~4095) | ||
542 | #endif | ||
543 | |||
544 | #define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_min_extract_offset) | ||
545 | |||
444 | #define VO_INIT_SIZE (VO__end - VO__text) | 546 | #define VO_INIT_SIZE (VO__end - VO__text) |
445 | #if ZO_INIT_SIZE > VO_INIT_SIZE | 547 | #if ZO_INIT_SIZE > VO_INIT_SIZE |
446 | #define INIT_SIZE ZO_INIT_SIZE | 548 | # define INIT_SIZE ZO_INIT_SIZE |
447 | #else | 549 | #else |
448 | #define INIT_SIZE VO_INIT_SIZE | 550 | # define INIT_SIZE VO_INIT_SIZE |
449 | #endif | 551 | #endif |
552 | |||
450 | init_size: .long INIT_SIZE # kernel initialization size | 553 | init_size: .long INIT_SIZE # kernel initialization size |
451 | handover_offset: .long 0 # Filled in by build.c | 554 | handover_offset: .long 0 # Filled in by build.c |
452 | 555 | ||
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 6b8d6e8cd449..abd06b19ddd2 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h | |||
@@ -12,29 +12,46 @@ | |||
12 | 12 | ||
13 | /* Minimum kernel alignment, as a power of two */ | 13 | /* Minimum kernel alignment, as a power of two */ |
14 | #ifdef CONFIG_X86_64 | 14 | #ifdef CONFIG_X86_64 |
15 | #define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT | 15 | # define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT |
16 | #else | 16 | #else |
17 | #define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_SIZE_ORDER) | 17 | # define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_SIZE_ORDER) |
18 | #endif | 18 | #endif |
19 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) | 19 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) |
20 | 20 | ||
21 | #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ | 21 | #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ |
22 | (CONFIG_PHYSICAL_ALIGN < MIN_KERNEL_ALIGN) | 22 | (CONFIG_PHYSICAL_ALIGN < MIN_KERNEL_ALIGN) |
23 | #error "Invalid value for CONFIG_PHYSICAL_ALIGN" | 23 | # error "Invalid value for CONFIG_PHYSICAL_ALIGN" |
24 | #endif | 24 | #endif |
25 | 25 | ||
26 | #ifdef CONFIG_KERNEL_BZIP2 | 26 | #ifdef CONFIG_KERNEL_BZIP2 |
27 | #define BOOT_HEAP_SIZE 0x400000 | 27 | # define BOOT_HEAP_SIZE 0x400000 |
28 | #else /* !CONFIG_KERNEL_BZIP2 */ | 28 | #else /* !CONFIG_KERNEL_BZIP2 */ |
29 | 29 | # define BOOT_HEAP_SIZE 0x10000 | |
30 | #define BOOT_HEAP_SIZE 0x10000 | 30 | #endif |
31 | |||
32 | #endif /* !CONFIG_KERNEL_BZIP2 */ | ||
33 | 31 | ||
34 | #ifdef CONFIG_X86_64 | 32 | #ifdef CONFIG_X86_64 |
35 | #define BOOT_STACK_SIZE 0x4000 | 33 | # define BOOT_STACK_SIZE 0x4000 |
36 | #else | 34 | |
37 | #define BOOT_STACK_SIZE 0x1000 | 35 | # define BOOT_INIT_PGT_SIZE (6*4096) |
36 | # ifdef CONFIG_RANDOMIZE_BASE | ||
37 | /* | ||
38 | * Assuming all cross the 512GB boundary: | ||
39 | * 1 page for level4 | ||
40 | * (2+2)*4 pages for kernel, param, cmd_line, and randomized kernel | ||
41 | * 2 pages for first 2M (video RAM: CONFIG_X86_VERBOSE_BOOTUP). | ||
42 | * Total is 19 pages. | ||
43 | */ | ||
44 | # ifdef CONFIG_X86_VERBOSE_BOOTUP | ||
45 | # define BOOT_PGT_SIZE (19*4096) | ||
46 | # else /* !CONFIG_X86_VERBOSE_BOOTUP */ | ||
47 | # define BOOT_PGT_SIZE (17*4096) | ||
48 | # endif | ||
49 | # else /* !CONFIG_RANDOMIZE_BASE */ | ||
50 | # define BOOT_PGT_SIZE BOOT_INIT_PGT_SIZE | ||
51 | # endif | ||
52 | |||
53 | #else /* !CONFIG_X86_64 */ | ||
54 | # define BOOT_STACK_SIZE 0x1000 | ||
38 | #endif | 55 | #endif |
39 | 56 | ||
40 | #endif /* _ASM_X86_BOOT_H */ | 57 | #endif /* _ASM_X86_BOOT_H */ |
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 802dde30c928..cf8f619b305f 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h | |||
@@ -37,7 +37,10 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, | |||
37 | alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) | 37 | alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) |
38 | #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE | 38 | #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE |
39 | 39 | ||
40 | #ifndef __pa | ||
40 | #define __pa(x) __phys_addr((unsigned long)(x)) | 41 | #define __pa(x) __phys_addr((unsigned long)(x)) |
42 | #endif | ||
43 | |||
41 | #define __pa_nodebug(x) __phys_addr_nodebug((unsigned long)(x)) | 44 | #define __pa_nodebug(x) __phys_addr_nodebug((unsigned long)(x)) |
42 | /* __pa_symbol should be used for C visible symbols. | 45 | /* __pa_symbol should be used for C visible symbols. |
43 | This seems to be the official gcc blessed way to do such arithmetic. */ | 46 | This seems to be the official gcc blessed way to do such arithmetic. */ |
@@ -51,7 +54,9 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, | |||
51 | #define __pa_symbol(x) \ | 54 | #define __pa_symbol(x) \ |
52 | __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x))) | 55 | __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x))) |
53 | 56 | ||
57 | #ifndef __va | ||
54 | #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) | 58 | #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) |
59 | #endif | ||
55 | 60 | ||
56 | #define __boot_va(x) __va(x) | 61 | #define __boot_va(x) __va(x) |
57 | #define __boot_pa(x) __pa(x) | 62 | #define __boot_pa(x) __pa(x) |
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 4928cf0d5af0..d5c2f8b40faa 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
@@ -47,12 +47,10 @@ | |||
47 | * are fully set up. If kernel ASLR is configured, it can extend the | 47 | * are fully set up. If kernel ASLR is configured, it can extend the |
48 | * kernel page table mapping, reducing the size of the modules area. | 48 | * kernel page table mapping, reducing the size of the modules area. |
49 | */ | 49 | */ |
50 | #define KERNEL_IMAGE_SIZE_DEFAULT (512 * 1024 * 1024) | 50 | #if defined(CONFIG_RANDOMIZE_BASE) |
51 | #if defined(CONFIG_RANDOMIZE_BASE) && \ | 51 | #define KERNEL_IMAGE_SIZE (1024 * 1024 * 1024) |
52 | CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT | ||
53 | #define KERNEL_IMAGE_SIZE CONFIG_RANDOMIZE_BASE_MAX_OFFSET | ||
54 | #else | 52 | #else |
55 | #define KERNEL_IMAGE_SIZE KERNEL_IMAGE_SIZE_DEFAULT | 53 | #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) |
56 | #endif | 54 | #endif |
57 | 55 | ||
58 | #endif /* _ASM_X86_PAGE_64_DEFS_H */ | 56 | #endif /* _ASM_X86_PAGE_64_DEFS_H */ |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 3c731413f1de..2970d22d7766 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -15,17 +15,6 @@ | |||
15 | #include <linux/cpumask.h> | 15 | #include <linux/cpumask.h> |
16 | #include <asm/frame.h> | 16 | #include <asm/frame.h> |
17 | 17 | ||
18 | static inline int paravirt_enabled(void) | ||
19 | { | ||
20 | return pv_info.paravirt_enabled; | ||
21 | } | ||
22 | |||
23 | static inline int paravirt_has_feature(unsigned int feature) | ||
24 | { | ||
25 | WARN_ON_ONCE(!pv_info.paravirt_enabled); | ||
26 | return (pv_info.features & feature); | ||
27 | } | ||
28 | |||
29 | static inline void load_sp0(struct tss_struct *tss, | 18 | static inline void load_sp0(struct tss_struct *tss, |
30 | struct thread_struct *thread) | 19 | struct thread_struct *thread) |
31 | { | 20 | { |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index b4a23eafa1b9..7fa9e7740ba3 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -69,15 +69,9 @@ struct pv_info { | |||
69 | u16 extra_user_64bit_cs; /* __USER_CS if none */ | 69 | u16 extra_user_64bit_cs; /* __USER_CS if none */ |
70 | #endif | 70 | #endif |
71 | 71 | ||
72 | int paravirt_enabled; | ||
73 | unsigned int features; /* valid only if paravirt_enabled is set */ | ||
74 | const char *name; | 72 | const char *name; |
75 | }; | 73 | }; |
76 | 74 | ||
77 | #define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x) | ||
78 | /* Supported features */ | ||
79 | #define PV_SUPPORTED_RTC (1<<0) | ||
80 | |||
81 | struct pv_init_ops { | 75 | struct pv_init_ops { |
82 | /* | 76 | /* |
83 | * Patch may replace one of the defined code sequences with | 77 | * Patch may replace one of the defined code sequences with |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9251aa962721..62c6cc3cc5d3 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -480,8 +480,6 @@ static inline unsigned long current_top_of_stack(void) | |||
480 | #include <asm/paravirt.h> | 480 | #include <asm/paravirt.h> |
481 | #else | 481 | #else |
482 | #define __cpuid native_cpuid | 482 | #define __cpuid native_cpuid |
483 | #define paravirt_enabled() 0 | ||
484 | #define paravirt_has(x) 0 | ||
485 | 483 | ||
486 | static inline void load_sp0(struct tss_struct *tss, | 484 | static inline void load_sp0(struct tss_struct *tss, |
487 | struct thread_struct *thread) | 485 | struct thread_struct *thread) |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 1ae89a2721d6..4dcdf74dfed8 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -142,6 +142,44 @@ struct x86_cpuinit_ops { | |||
142 | struct timespec; | 142 | struct timespec; |
143 | 143 | ||
144 | /** | 144 | /** |
145 | * struct x86_legacy_devices - legacy x86 devices | ||
146 | * | ||
147 | * @pnpbios: this platform can have a PNPBIOS. If this is disabled the platform | ||
148 | * is known to never have a PNPBIOS. | ||
149 | * | ||
150 | * These are devices known to require LPC or ISA bus. The definition of legacy | ||
151 | * devices adheres to the ACPI 5.2.9.3 IA-PC Boot Architecture flag | ||
152 | * ACPI_FADT_LEGACY_DEVICES. These devices consist of user visible devices on | ||
153 | * the LPC or ISA bus. User visible devices are devices that have end-user | ||
154 | * accessible connectors (for example, LPT parallel port). Legacy devices on | ||
155 | * the LPC bus consist for example of serial and parallel ports, PS/2 keyboard | ||
156 | * / mouse, and the floppy disk controller. A system that lacks all known | ||
157 | * legacy devices can assume all devices can be detected exclusively via | ||
158 | * standard device enumeration mechanisms including the ACPI namespace. | ||
159 | * | ||
160 | * A system which has does not have ACPI_FADT_LEGACY_DEVICES enabled must not | ||
161 | * have any of the legacy devices enumerated below present. | ||
162 | */ | ||
163 | struct x86_legacy_devices { | ||
164 | int pnpbios; | ||
165 | }; | ||
166 | |||
167 | /** | ||
168 | * struct x86_legacy_features - legacy x86 features | ||
169 | * | ||
170 | * @rtc: this device has a CMOS real-time clock present | ||
171 | * @ebda_search: it's safe to search for the EBDA signature in the hardware's | ||
172 | * low RAM | ||
173 | * @devices: legacy x86 devices, refer to struct x86_legacy_devices | ||
174 | * documentation for further details. | ||
175 | */ | ||
176 | struct x86_legacy_features { | ||
177 | int rtc; | ||
178 | int ebda_search; | ||
179 | struct x86_legacy_devices devices; | ||
180 | }; | ||
181 | |||
182 | /** | ||
145 | * struct x86_platform_ops - platform specific runtime functions | 183 | * struct x86_platform_ops - platform specific runtime functions |
146 | * @calibrate_tsc: calibrate TSC | 184 | * @calibrate_tsc: calibrate TSC |
147 | * @get_wallclock: get time from HW clock like RTC etc. | 185 | * @get_wallclock: get time from HW clock like RTC etc. |
@@ -152,6 +190,14 @@ struct timespec; | |||
152 | * @save_sched_clock_state: save state for sched_clock() on suspend | 190 | * @save_sched_clock_state: save state for sched_clock() on suspend |
153 | * @restore_sched_clock_state: restore state for sched_clock() on resume | 191 | * @restore_sched_clock_state: restore state for sched_clock() on resume |
154 | * @apic_post_init: adjust apic if neeeded | 192 | * @apic_post_init: adjust apic if neeeded |
193 | * @legacy: legacy features | ||
194 | * @set_legacy_features: override legacy features. Use of this callback | ||
195 | * is highly discouraged. You should only need | ||
196 | * this if your hardware platform requires further | ||
197 | * custom fine tuning far beyong what may be | ||
198 | * possible in x86_early_init_platform_quirks() by | ||
199 | * only using the current x86_hardware_subarch | ||
200 | * semantics. | ||
155 | */ | 201 | */ |
156 | struct x86_platform_ops { | 202 | struct x86_platform_ops { |
157 | unsigned long (*calibrate_tsc)(void); | 203 | unsigned long (*calibrate_tsc)(void); |
@@ -165,6 +211,8 @@ struct x86_platform_ops { | |||
165 | void (*save_sched_clock_state)(void); | 211 | void (*save_sched_clock_state)(void); |
166 | void (*restore_sched_clock_state)(void); | 212 | void (*restore_sched_clock_state)(void); |
167 | void (*apic_post_init)(void); | 213 | void (*apic_post_init)(void); |
214 | struct x86_legacy_features legacy; | ||
215 | void (*set_legacy_features)(void); | ||
168 | }; | 216 | }; |
169 | 217 | ||
170 | struct pci_dev; | 218 | struct pci_dev; |
@@ -186,6 +234,8 @@ extern struct x86_cpuinit_ops x86_cpuinit; | |||
186 | extern struct x86_platform_ops x86_platform; | 234 | extern struct x86_platform_ops x86_platform; |
187 | extern struct x86_msi_ops x86_msi; | 235 | extern struct x86_msi_ops x86_msi; |
188 | extern struct x86_io_apic_ops x86_io_apic_ops; | 236 | extern struct x86_io_apic_ops x86_io_apic_ops; |
237 | |||
238 | extern void x86_early_init_platform_quirks(void); | ||
189 | extern void x86_init_noop(void); | 239 | extern void x86_init_noop(void); |
190 | extern void x86_init_uint_noop(unsigned int unused); | 240 | extern void x86_init_uint_noop(unsigned int unused); |
191 | 241 | ||
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 329254373479..c18ce67495fa 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h | |||
@@ -157,7 +157,46 @@ struct boot_params { | |||
157 | __u8 _pad9[276]; /* 0xeec */ | 157 | __u8 _pad9[276]; /* 0xeec */ |
158 | } __attribute__((packed)); | 158 | } __attribute__((packed)); |
159 | 159 | ||
160 | enum { | 160 | /** |
161 | * enum x86_hardware_subarch - x86 hardware subarchitecture | ||
162 | * | ||
163 | * The x86 hardware_subarch and hardware_subarch_data were added as of the x86 | ||
164 | * boot protocol 2.07 to help distinguish and support custom x86 boot | ||
165 | * sequences. This enum represents accepted values for the x86 | ||
166 | * hardware_subarch. Custom x86 boot sequences (not X86_SUBARCH_PC) do not | ||
167 | * have or simply *cannot* make use of natural stubs like BIOS or EFI, the | ||
168 | * hardware_subarch can be used on the Linux entry path to revector to a | ||
169 | * subarchitecture stub when needed. This subarchitecture stub can be used to | ||
170 | * set up Linux boot parameters or for special care to account for nonstandard | ||
171 | * handling of page tables. | ||
172 | * | ||
173 | * These enums should only ever be used by x86 code, and the code that uses | ||
174 | * it should be well contained and compartamentalized. | ||
175 | * | ||
176 | * KVM and Xen HVM do not have a subarch as these are expected to follow | ||
177 | * standard x86 boot entries. If there is a genuine need for "hypervisor" type | ||
178 | * that should be considered separately in the future. Future guest types | ||
179 | * should seriously consider working with standard x86 boot stubs such as | ||
180 | * the BIOS or EFI boot stubs. | ||
181 | * | ||
182 | * WARNING: this enum is only used for legacy hacks, for platform features that | ||
183 | * are not easily enumerated or discoverable. You should not ever use | ||
184 | * this for new features. | ||
185 | * | ||
186 | * @X86_SUBARCH_PC: Should be used if the hardware is enumerable using standard | ||
187 | * PC mechanisms (PCI, ACPI) and doesn't need a special boot flow. | ||
188 | * @X86_SUBARCH_LGUEST: Used for x86 hypervisor demo, lguest | ||
189 | * @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV boot path, | ||
190 | * which start at asm startup_xen() entry point and later jump to the C | ||
191 | * xen_start_kernel() entry point. Both domU and dom0 type of guests are | ||
192 | * currently supportd through this PV boot path. | ||
193 | * @X86_SUBARCH_INTEL_MID: Used for Intel MID (Mobile Internet Device) platform | ||
194 | * systems which do not have the PCI legacy interfaces. | ||
195 | * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC for | ||
196 | * for settop boxes and media devices, the use of a subarch for CE4100 | ||
197 | * is more of a hack... | ||
198 | */ | ||
199 | enum x86_hardware_subarch { | ||
161 | X86_SUBARCH_PC = 0, | 200 | X86_SUBARCH_PC = 0, |
162 | X86_SUBARCH_LGUEST, | 201 | X86_SUBARCH_LGUEST, |
163 | X86_SUBARCH_XEN, | 202 | X86_SUBARCH_XEN, |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 616ebd22ef9a..9abf8551c7e4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -2,7 +2,11 @@ | |||
2 | # Makefile for the linux kernel. | 2 | # Makefile for the linux kernel. |
3 | # | 3 | # |
4 | 4 | ||
5 | extra-y := head_$(BITS).o head$(BITS).o head.o vmlinux.lds | 5 | extra-y := head_$(BITS).o |
6 | extra-y += head$(BITS).o | ||
7 | extra-y += ebda.o | ||
8 | extra-y += platform-quirks.o | ||
9 | extra-y += vmlinux.lds | ||
6 | 10 | ||
7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | 11 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) |
8 | 12 | ||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2522e564269e..f115a58f7c84 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -913,6 +913,15 @@ late_initcall(hpet_insert_resource); | |||
913 | 913 | ||
914 | static int __init acpi_parse_fadt(struct acpi_table_header *table) | 914 | static int __init acpi_parse_fadt(struct acpi_table_header *table) |
915 | { | 915 | { |
916 | if (!(acpi_gbl_FADT.boot_flags & ACPI_FADT_LEGACY_DEVICES)) { | ||
917 | pr_debug("ACPI: no legacy devices present\n"); | ||
918 | x86_platform.legacy.devices.pnpbios = 0; | ||
919 | } | ||
920 | |||
921 | if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) { | ||
922 | pr_debug("ACPI: not registering RTC platform device\n"); | ||
923 | x86_platform.legacy.rtc = 0; | ||
924 | } | ||
916 | 925 | ||
917 | #ifdef CONFIG_X86_PM_TIMER | 926 | #ifdef CONFIG_X86_PM_TIMER |
918 | /* detect the location of the ACPI PM Timer */ | 927 | /* detect the location of the ACPI PM Timer */ |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 9307f182fe30..c7364bd633e1 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -2267,7 +2267,7 @@ static int __init apm_init(void) | |||
2267 | 2267 | ||
2268 | dmi_check_system(apm_dmi_table); | 2268 | dmi_check_system(apm_dmi_table); |
2269 | 2269 | ||
2270 | if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { | 2270 | if (apm_info.bios.version == 0 || machine_is_olpc()) { |
2271 | printk(KERN_INFO "apm: BIOS not found.\n"); | 2271 | printk(KERN_INFO "apm: BIOS not found.\n"); |
2272 | return -ENODEV; | 2272 | return -ENODEV; |
2273 | } | 2273 | } |
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 5c042466f274..674134e9f5e5 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c | |||
@@ -80,6 +80,7 @@ void common(void) { | |||
80 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | 80 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); |
81 | OFFSET(BP_version, boot_params, hdr.version); | 81 | OFFSET(BP_version, boot_params, hdr.version); |
82 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); | 82 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); |
83 | OFFSET(BP_init_size, boot_params, hdr.init_size); | ||
83 | OFFSET(BP_pref_address, boot_params, hdr.pref_address); | 84 | OFFSET(BP_pref_address, boot_params, hdr.pref_address); |
84 | OFFSET(BP_code32_start, boot_params, hdr.code32_start); | 85 | OFFSET(BP_code32_start, boot_params, hdr.code32_start); |
85 | 86 | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b18f4706e607..8dae51fd3db1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -233,7 +233,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) | |||
233 | * The Quark is also family 5, but does not have the same bug. | 233 | * The Quark is also family 5, but does not have the same bug. |
234 | */ | 234 | */ |
235 | clear_cpu_bug(c, X86_BUG_F00F); | 235 | clear_cpu_bug(c, X86_BUG_F00F); |
236 | if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) { | 236 | if (c->x86 == 5 && c->x86_model < 9) { |
237 | static int f00f_workaround_enabled; | 237 | static int f00f_workaround_enabled; |
238 | 238 | ||
239 | set_cpu_bug(c, X86_BUG_F00F); | 239 | set_cpu_bug(c, X86_BUG_F00F); |
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/ebda.c index 992f442ca155..afe65dffee80 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/ebda.c | |||
@@ -38,7 +38,7 @@ void __init reserve_ebda_region(void) | |||
38 | * that the paravirt case can handle memory setup | 38 | * that the paravirt case can handle memory setup |
39 | * correctly, without our help. | 39 | * correctly, without our help. |
40 | */ | 40 | */ |
41 | if (paravirt_enabled()) | 41 | if (!x86_platform.legacy.ebda_search) |
42 | return; | 42 | return; |
43 | 43 | ||
44 | /* end of low (conventional) memory */ | 44 | /* end of low (conventional) memory */ |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 2911ef3a9f1c..d784bb547a9d 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -34,6 +34,8 @@ asmlinkage __visible void __init i386_start_kernel(void) | |||
34 | cr4_init_shadow(); | 34 | cr4_init_shadow(); |
35 | sanitize_boot_params(&boot_params); | 35 | sanitize_boot_params(&boot_params); |
36 | 36 | ||
37 | x86_early_init_platform_quirks(); | ||
38 | |||
37 | /* Call the subarch specific early setup function */ | 39 | /* Call the subarch specific early setup function */ |
38 | switch (boot_params.hdr.hardware_subarch) { | 40 | switch (boot_params.hdr.hardware_subarch) { |
39 | case X86_SUBARCH_INTEL_MID: | 41 | case X86_SUBARCH_INTEL_MID: |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 1f4422d5c8d0..b72fb0b71dd1 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -182,6 +182,7 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
182 | if (!boot_params.hdr.version) | 182 | if (!boot_params.hdr.version) |
183 | copy_bootdata(__va(real_mode_data)); | 183 | copy_bootdata(__va(real_mode_data)); |
184 | 184 | ||
185 | x86_early_init_platform_quirks(); | ||
185 | reserve_ebda_region(); | 186 | reserve_ebda_region(); |
186 | 187 | ||
187 | switch (boot_params.hdr.hardware_subarch) { | 188 | switch (boot_params.hdr.hardware_subarch) { |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index dc1207e2f193..eea2a6f72b31 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -285,14 +285,6 @@ static void __init paravirt_ops_setup(void) | |||
285 | { | 285 | { |
286 | pv_info.name = "KVM"; | 286 | pv_info.name = "KVM"; |
287 | 287 | ||
288 | /* | ||
289 | * KVM isn't paravirt in the sense of paravirt_enabled. A KVM | ||
290 | * guest kernel works like a bare metal kernel with additional | ||
291 | * features, and paravirt_enabled is about features that are | ||
292 | * missing. | ||
293 | */ | ||
294 | pv_info.paravirt_enabled = 0; | ||
295 | |||
296 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) | 288 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) |
297 | pv_cpu_ops.io_delay = kvm_io_delay; | 289 | pv_cpu_ops.io_delay = kvm_io_delay; |
298 | 290 | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index f9583917c7c4..7b3b3f24c3ea 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -294,7 +294,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | |||
294 | 294 | ||
295 | struct pv_info pv_info = { | 295 | struct pv_info pv_info = { |
296 | .name = "bare hardware", | 296 | .name = "bare hardware", |
297 | .paravirt_enabled = 0, | ||
298 | .kernel_rpl = 0, | 297 | .kernel_rpl = 0, |
299 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ | 298 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ |
300 | 299 | ||
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c new file mode 100644 index 000000000000..b2f8a33b36ff --- /dev/null +++ b/arch/x86/kernel/platform-quirks.c | |||
@@ -0,0 +1,35 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/init.h> | ||
3 | |||
4 | #include <asm/setup.h> | ||
5 | #include <asm/bios_ebda.h> | ||
6 | |||
7 | void __init x86_early_init_platform_quirks(void) | ||
8 | { | ||
9 | x86_platform.legacy.rtc = 1; | ||
10 | x86_platform.legacy.ebda_search = 0; | ||
11 | x86_platform.legacy.devices.pnpbios = 1; | ||
12 | |||
13 | switch (boot_params.hdr.hardware_subarch) { | ||
14 | case X86_SUBARCH_PC: | ||
15 | x86_platform.legacy.ebda_search = 1; | ||
16 | break; | ||
17 | case X86_SUBARCH_XEN: | ||
18 | case X86_SUBARCH_LGUEST: | ||
19 | case X86_SUBARCH_INTEL_MID: | ||
20 | case X86_SUBARCH_CE4100: | ||
21 | x86_platform.legacy.devices.pnpbios = 0; | ||
22 | x86_platform.legacy.rtc = 0; | ||
23 | break; | ||
24 | } | ||
25 | |||
26 | if (x86_platform.set_legacy_features) | ||
27 | x86_platform.set_legacy_features(); | ||
28 | } | ||
29 | |||
30 | #if defined(CONFIG_PNPBIOS) | ||
31 | bool __init arch_pnpbios_disabled(void) | ||
32 | { | ||
33 | return x86_platform.legacy.devices.pnpbios == 0; | ||
34 | } | ||
35 | #endif | ||
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 4af8d063fb36..eceaa082ec3f 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/time.h> | 14 | #include <asm/time.h> |
15 | #include <asm/intel-mid.h> | 15 | #include <asm/intel-mid.h> |
16 | #include <asm/rtc.h> | 16 | #include <asm/rtc.h> |
17 | #include <asm/setup.h> | ||
17 | 18 | ||
18 | #ifdef CONFIG_X86_32 | 19 | #ifdef CONFIG_X86_32 |
19 | /* | 20 | /* |
@@ -185,22 +186,7 @@ static __init int add_rtc_cmos(void) | |||
185 | } | 186 | } |
186 | } | 187 | } |
187 | #endif | 188 | #endif |
188 | if (of_have_populated_dt()) | 189 | if (!x86_platform.legacy.rtc) |
189 | return 0; | ||
190 | |||
191 | /* Intel MID platforms don't have ioport rtc */ | ||
192 | if (intel_mid_identify_cpu()) | ||
193 | return -ENODEV; | ||
194 | |||
195 | #ifdef CONFIG_ACPI | ||
196 | if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) { | ||
197 | /* This warning can likely go away again in a year or two. */ | ||
198 | pr_info("ACPI: not registering RTC platform device\n"); | ||
199 | return -ENODEV; | ||
200 | } | ||
201 | #endif | ||
202 | |||
203 | if (paravirt_enabled() && !paravirt_has(RTC)) | ||
204 | return -ENODEV; | 190 | return -ENODEV; |
205 | 191 | ||
206 | platform_device_register(&rtc_device); | 192 | platform_device_register(&rtc_device); |
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index e72a07f20b05..9b0185fbe3eb 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
@@ -74,12 +74,6 @@ void __init tboot_probe(void) | |||
74 | return; | 74 | return; |
75 | } | 75 | } |
76 | 76 | ||
77 | /* only a natively booted kernel should be using TXT */ | ||
78 | if (paravirt_enabled()) { | ||
79 | pr_warning("non-0 tboot_addr but pv_ops is enabled\n"); | ||
80 | return; | ||
81 | } | ||
82 | |||
83 | /* Map and check for tboot UUID. */ | 77 | /* Map and check for tboot UUID. */ |
84 | set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr); | 78 | set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr); |
85 | tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE); | 79 | tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE); |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 4c941f88d405..9297a002d8e5 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -334,7 +334,7 @@ SECTIONS | |||
334 | __brk_limit = .; | 334 | __brk_limit = .; |
335 | } | 335 | } |
336 | 336 | ||
337 | . = ALIGN(PAGE_SIZE); | 337 | . = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */ |
338 | _end = .; | 338 | _end = .; |
339 | 339 | ||
340 | STABS_DEBUG | 340 | STABS_DEBUG |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index fd57d3ae7e16..3847e736702e 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -1408,13 +1408,10 @@ __init void lguest_init(void) | |||
1408 | { | 1408 | { |
1409 | /* We're under lguest. */ | 1409 | /* We're under lguest. */ |
1410 | pv_info.name = "lguest"; | 1410 | pv_info.name = "lguest"; |
1411 | /* Paravirt is enabled. */ | ||
1412 | pv_info.paravirt_enabled = 1; | ||
1413 | /* We're running at privilege level 1, not 0 as normal. */ | 1411 | /* We're running at privilege level 1, not 0 as normal. */ |
1414 | pv_info.kernel_rpl = 1; | 1412 | pv_info.kernel_rpl = 1; |
1415 | /* Everyone except Xen runs with this set. */ | 1413 | /* Everyone except Xen runs with this set. */ |
1416 | pv_info.shared_kernel_pmd = 1; | 1414 | pv_info.shared_kernel_pmd = 1; |
1417 | pv_info.features = 0; | ||
1418 | 1415 | ||
1419 | /* | 1416 | /* |
1420 | * We set up all the lguest overrides for sensitive operations. These | 1417 | * We set up all the lguest overrides for sensitive operations. These |
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c new file mode 100644 index 000000000000..ec21796ac5fd --- /dev/null +++ b/arch/x86/mm/ident_map.c | |||
@@ -0,0 +1,79 @@ | |||
1 | /* | ||
2 | * Helper routines for building identity mapping page tables. This is | ||
3 | * included by both the compressed kernel and the regular kernel. | ||
4 | */ | ||
5 | |||
6 | static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page, | ||
7 | unsigned long addr, unsigned long end) | ||
8 | { | ||
9 | addr &= PMD_MASK; | ||
10 | for (; addr < end; addr += PMD_SIZE) { | ||
11 | pmd_t *pmd = pmd_page + pmd_index(addr); | ||
12 | |||
13 | if (!pmd_present(*pmd)) | ||
14 | set_pmd(pmd, __pmd(addr | pmd_flag)); | ||
15 | } | ||
16 | } | ||
17 | |||
18 | static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, | ||
19 | unsigned long addr, unsigned long end) | ||
20 | { | ||
21 | unsigned long next; | ||
22 | |||
23 | for (; addr < end; addr = next) { | ||
24 | pud_t *pud = pud_page + pud_index(addr); | ||
25 | pmd_t *pmd; | ||
26 | |||
27 | next = (addr & PUD_MASK) + PUD_SIZE; | ||
28 | if (next > end) | ||
29 | next = end; | ||
30 | |||
31 | if (pud_present(*pud)) { | ||
32 | pmd = pmd_offset(pud, 0); | ||
33 | ident_pmd_init(info->pmd_flag, pmd, addr, next); | ||
34 | continue; | ||
35 | } | ||
36 | pmd = (pmd_t *)info->alloc_pgt_page(info->context); | ||
37 | if (!pmd) | ||
38 | return -ENOMEM; | ||
39 | ident_pmd_init(info->pmd_flag, pmd, addr, next); | ||
40 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
41 | } | ||
42 | |||
43 | return 0; | ||
44 | } | ||
45 | |||
46 | int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, | ||
47 | unsigned long addr, unsigned long end) | ||
48 | { | ||
49 | unsigned long next; | ||
50 | int result; | ||
51 | int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0; | ||
52 | |||
53 | for (; addr < end; addr = next) { | ||
54 | pgd_t *pgd = pgd_page + pgd_index(addr) + off; | ||
55 | pud_t *pud; | ||
56 | |||
57 | next = (addr & PGDIR_MASK) + PGDIR_SIZE; | ||
58 | if (next > end) | ||
59 | next = end; | ||
60 | |||
61 | if (pgd_present(*pgd)) { | ||
62 | pud = pud_offset(pgd, 0); | ||
63 | result = ident_pud_init(info, pud, addr, next); | ||
64 | if (result) | ||
65 | return result; | ||
66 | continue; | ||
67 | } | ||
68 | |||
69 | pud = (pud_t *)info->alloc_pgt_page(info->context); | ||
70 | if (!pud) | ||
71 | return -ENOMEM; | ||
72 | result = ident_pud_init(info, pud, addr, next); | ||
73 | if (result) | ||
74 | return result; | ||
75 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
76 | } | ||
77 | |||
78 | return 0; | ||
79 | } | ||
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 85af914e3d27..84df150ee77e 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -804,9 +804,6 @@ void __init mem_init(void) | |||
804 | BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); | 804 | BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); |
805 | #undef high_memory | 805 | #undef high_memory |
806 | #undef __FIXADDR_TOP | 806 | #undef __FIXADDR_TOP |
807 | #ifdef CONFIG_RANDOMIZE_BASE | ||
808 | BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE); | ||
809 | #endif | ||
810 | 807 | ||
811 | #ifdef CONFIG_HIGHMEM | 808 | #ifdef CONFIG_HIGHMEM |
812 | BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); | 809 | BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 89d97477c1d9..bce2e5d9edd4 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -58,79 +58,7 @@ | |||
58 | 58 | ||
59 | #include "mm_internal.h" | 59 | #include "mm_internal.h" |
60 | 60 | ||
61 | static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page, | 61 | #include "ident_map.c" |
62 | unsigned long addr, unsigned long end) | ||
63 | { | ||
64 | addr &= PMD_MASK; | ||
65 | for (; addr < end; addr += PMD_SIZE) { | ||
66 | pmd_t *pmd = pmd_page + pmd_index(addr); | ||
67 | |||
68 | if (!pmd_present(*pmd)) | ||
69 | set_pmd(pmd, __pmd(addr | pmd_flag)); | ||
70 | } | ||
71 | } | ||
72 | static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, | ||
73 | unsigned long addr, unsigned long end) | ||
74 | { | ||
75 | unsigned long next; | ||
76 | |||
77 | for (; addr < end; addr = next) { | ||
78 | pud_t *pud = pud_page + pud_index(addr); | ||
79 | pmd_t *pmd; | ||
80 | |||
81 | next = (addr & PUD_MASK) + PUD_SIZE; | ||
82 | if (next > end) | ||
83 | next = end; | ||
84 | |||
85 | if (pud_present(*pud)) { | ||
86 | pmd = pmd_offset(pud, 0); | ||
87 | ident_pmd_init(info->pmd_flag, pmd, addr, next); | ||
88 | continue; | ||
89 | } | ||
90 | pmd = (pmd_t *)info->alloc_pgt_page(info->context); | ||
91 | if (!pmd) | ||
92 | return -ENOMEM; | ||
93 | ident_pmd_init(info->pmd_flag, pmd, addr, next); | ||
94 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
95 | } | ||
96 | |||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, | ||
101 | unsigned long addr, unsigned long end) | ||
102 | { | ||
103 | unsigned long next; | ||
104 | int result; | ||
105 | int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0; | ||
106 | |||
107 | for (; addr < end; addr = next) { | ||
108 | pgd_t *pgd = pgd_page + pgd_index(addr) + off; | ||
109 | pud_t *pud; | ||
110 | |||
111 | next = (addr & PGDIR_MASK) + PGDIR_SIZE; | ||
112 | if (next > end) | ||
113 | next = end; | ||
114 | |||
115 | if (pgd_present(*pgd)) { | ||
116 | pud = pud_offset(pgd, 0); | ||
117 | result = ident_pud_init(info, pud, addr, next); | ||
118 | if (result) | ||
119 | return result; | ||
120 | continue; | ||
121 | } | ||
122 | |||
123 | pud = (pud_t *)info->alloc_pgt_page(info->context); | ||
124 | if (!pud) | ||
125 | return -ENOMEM; | ||
126 | result = ident_pud_init(info, pud, addr, next); | ||
127 | if (result) | ||
128 | return result; | ||
129 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
130 | } | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | 62 | ||
135 | /* | 63 | /* |
136 | * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the | 64 | * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the |
diff --git a/arch/x86/tools/calc_run_size.sh b/arch/x86/tools/calc_run_size.sh deleted file mode 100644 index 1a4c17bb3910..000000000000 --- a/arch/x86/tools/calc_run_size.sh +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | #!/bin/sh | ||
2 | # | ||
3 | # Calculate the amount of space needed to run the kernel, including room for | ||
4 | # the .bss and .brk sections. | ||
5 | # | ||
6 | # Usage: | ||
7 | # objdump -h a.out | sh calc_run_size.sh | ||
8 | |||
9 | NUM='\([0-9a-fA-F]*[ \t]*\)' | ||
10 | OUT=$(sed -n 's/^[ \t0-9]*.b[sr][sk][ \t]*'"$NUM$NUM$NUM$NUM"'.*/\1\4/p') | ||
11 | if [ -z "$OUT" ] ; then | ||
12 | echo "Never found .bss or .brk file offset" >&2 | ||
13 | exit 1 | ||
14 | fi | ||
15 | |||
16 | OUT=$(echo ${OUT# }) | ||
17 | sizeA=$(printf "%d" 0x${OUT%% *}) | ||
18 | OUT=${OUT#* } | ||
19 | offsetA=$(printf "%d" 0x${OUT%% *}) | ||
20 | OUT=${OUT#* } | ||
21 | sizeB=$(printf "%d" 0x${OUT%% *}) | ||
22 | OUT=${OUT#* } | ||
23 | offsetB=$(printf "%d" 0x${OUT%% *}) | ||
24 | |||
25 | run_size=$(( $offsetA + $sizeA + $sizeB )) | ||
26 | |||
27 | # BFD linker shows the same file offset in ELF. | ||
28 | if [ "$offsetA" -ne "$offsetB" ] ; then | ||
29 | # Gold linker shows them as consecutive. | ||
30 | endB=$(( $offsetB + $sizeB )) | ||
31 | if [ "$endB" != "$run_size" ] ; then | ||
32 | printf "sizeA: 0x%x\n" $sizeA >&2 | ||
33 | printf "offsetA: 0x%x\n" $offsetA >&2 | ||
34 | printf "sizeB: 0x%x\n" $sizeB >&2 | ||
35 | printf "offsetB: 0x%x\n" $offsetB >&2 | ||
36 | echo ".bss and .brk are non-contiguous" >&2 | ||
37 | exit 1 | ||
38 | fi | ||
39 | fi | ||
40 | |||
41 | printf "%d\n" $run_size | ||
42 | exit 0 | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 6ab672233ac9..760789ae8562 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1206,13 +1206,11 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, | |||
1206 | } | 1206 | } |
1207 | 1207 | ||
1208 | static const struct pv_info xen_info __initconst = { | 1208 | static const struct pv_info xen_info __initconst = { |
1209 | .paravirt_enabled = 1, | ||
1210 | .shared_kernel_pmd = 0, | 1209 | .shared_kernel_pmd = 0, |
1211 | 1210 | ||
1212 | #ifdef CONFIG_X86_64 | 1211 | #ifdef CONFIG_X86_64 |
1213 | .extra_user_64bit_cs = FLAT_USER_CS64, | 1212 | .extra_user_64bit_cs = FLAT_USER_CS64, |
1214 | #endif | 1213 | #endif |
1215 | .features = 0, | ||
1216 | .name = "Xen", | 1214 | .name = "Xen", |
1217 | }; | 1215 | }; |
1218 | 1216 | ||
@@ -1528,6 +1526,11 @@ static void __init xen_pvh_early_guest_init(void) | |||
1528 | } | 1526 | } |
1529 | #endif /* CONFIG_XEN_PVH */ | 1527 | #endif /* CONFIG_XEN_PVH */ |
1530 | 1528 | ||
1529 | static void __init xen_dom0_set_legacy_features(void) | ||
1530 | { | ||
1531 | x86_platform.legacy.rtc = 1; | ||
1532 | } | ||
1533 | |||
1531 | /* First C function to be called on Xen boot */ | 1534 | /* First C function to be called on Xen boot */ |
1532 | asmlinkage __visible void __init xen_start_kernel(void) | 1535 | asmlinkage __visible void __init xen_start_kernel(void) |
1533 | { | 1536 | { |
@@ -1548,8 +1551,6 @@ asmlinkage __visible void __init xen_start_kernel(void) | |||
1548 | 1551 | ||
1549 | /* Install Xen paravirt ops */ | 1552 | /* Install Xen paravirt ops */ |
1550 | pv_info = xen_info; | 1553 | pv_info = xen_info; |
1551 | if (xen_initial_domain()) | ||
1552 | pv_info.features |= PV_SUPPORTED_RTC; | ||
1553 | pv_init_ops = xen_init_ops; | 1554 | pv_init_ops = xen_init_ops; |
1554 | if (!xen_pvh_domain()) { | 1555 | if (!xen_pvh_domain()) { |
1555 | pv_cpu_ops = xen_cpu_ops; | 1556 | pv_cpu_ops = xen_cpu_ops; |
@@ -1684,6 +1685,7 @@ asmlinkage __visible void __init xen_start_kernel(void) | |||
1684 | boot_params.hdr.ramdisk_image = initrd_start; | 1685 | boot_params.hdr.ramdisk_image = initrd_start; |
1685 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; | 1686 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; |
1686 | boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); | 1687 | boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); |
1688 | boot_params.hdr.hardware_subarch = X86_SUBARCH_XEN; | ||
1687 | 1689 | ||
1688 | if (!xen_initial_domain()) { | 1690 | if (!xen_initial_domain()) { |
1689 | add_preferred_console("xenboot", 0, NULL); | 1691 | add_preferred_console("xenboot", 0, NULL); |
@@ -1701,6 +1703,8 @@ asmlinkage __visible void __init xen_start_kernel(void) | |||
1701 | .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS, | 1703 | .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS, |
1702 | }; | 1704 | }; |
1703 | 1705 | ||
1706 | x86_platform.set_legacy_features = | ||
1707 | xen_dom0_set_legacy_features; | ||
1704 | xen_init_vga(info, xen_start_info->console.dom0.info_size); | 1708 | xen_init_vga(info, xen_start_info->console.dom0.info_size); |
1705 | xen_start_info->console.domU.mfn = 0; | 1709 | xen_start_info->console.domU.mfn = 0; |
1706 | xen_start_info->console.domU.evtchn = 0; | 1710 | xen_start_info->console.domU.evtchn = 0; |