diff options
author | Rafael J. Wysocki <rjw@sisk.pl> | 2007-10-18 06:04:53 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-18 17:37:19 -0400 |
commit | d158cbdf39ffaec9dd5299fdfdfdd2c7897a71dc (patch) | |
tree | 1a03042426b7469c06403a5e5615bc81d3ad1d1a /arch/x86/kernel | |
parent | d307c4a8e826c44f9633bd3f7e60d0491e7d885a (diff) |
Hibernation: Arbitrary boot kernel support on x86_64
Make it possible to restore a hibernation image on x86_64 with the help of a
kernel different from the one in the image.
The idea is to split the core restoration code into two separate parts and to
place each of them in a different page. The first part belongs to the boot
kernel and is executed as the last step of the image kernel's memory
restoration procedure. Before being executed, it is relocated to a safe page
that won't be overwritten while copying the image kernel pages.
The final operation performed by it is a jump to the second part of the core
restoration code that belongs to the image kernel and has just been restored.
This code makes the CPU switch to the image kernel's page tables and restores
the state of general purpose registers (including the stack pointer) from
before the hibernation.
The main issue with this idea is that in order to jump to the second part of
the core restoration code the boot kernel needs to know its address.
However, this address may be passed to it in the image header. Namely, the
part of the image header previously used for checking if the version of the
image kernel is correct can be replaced with some architecture specific data
that will allow the boot kernel to jump to the right address within the image
kernel. These data should also be used for checking if the image kernel is
compatible with the boot kernel (as far as the memory restroration procedure
is concerned). It can be done, for example, with the help of a "magic" value
that has to be equal in both kernels, so that they can be regarded as
compatible.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/suspend_64.c | 54 | ||||
-rw-r--r-- | arch/x86/kernel/suspend_asm_64.S | 41 |
2 files changed, 87 insertions, 8 deletions
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c index 573c0a6e0ac6..01fbfb018ca9 100644 --- a/arch/x86/kernel/suspend_64.c +++ b/arch/x86/kernel/suspend_64.c | |||
@@ -150,8 +150,16 @@ void fix_processor_context(void) | |||
150 | /* Defined in arch/x86_64/kernel/suspend_asm.S */ | 150 | /* Defined in arch/x86_64/kernel/suspend_asm.S */ |
151 | extern int restore_image(void); | 151 | extern int restore_image(void); |
152 | 152 | ||
153 | /* | ||
154 | * Address to jump to in the last phase of restore in order to get to the image | ||
155 | * kernel's text (this value is passed in the image header). | ||
156 | */ | ||
157 | unsigned long restore_jump_address; | ||
158 | |||
153 | pgd_t *temp_level4_pgt; | 159 | pgd_t *temp_level4_pgt; |
154 | 160 | ||
161 | void *relocated_restore_code; | ||
162 | |||
155 | static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) | 163 | static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) |
156 | { | 164 | { |
157 | long i, j; | 165 | long i, j; |
@@ -175,7 +183,7 @@ static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long en | |||
175 | 183 | ||
176 | if (paddr >= end) | 184 | if (paddr >= end) |
177 | break; | 185 | break; |
178 | pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr; | 186 | pe = __PAGE_KERNEL_LARGE_EXEC | paddr; |
179 | pe &= __supported_pte_mask; | 187 | pe &= __supported_pte_mask; |
180 | set_pmd(pmd, __pmd(pe)); | 188 | set_pmd(pmd, __pmd(pe)); |
181 | } | 189 | } |
@@ -222,6 +230,13 @@ int swsusp_arch_resume(void) | |||
222 | /* We have got enough memory and from now on we cannot recover */ | 230 | /* We have got enough memory and from now on we cannot recover */ |
223 | if ((error = set_up_temporary_mappings())) | 231 | if ((error = set_up_temporary_mappings())) |
224 | return error; | 232 | return error; |
233 | |||
234 | relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC); | ||
235 | if (!relocated_restore_code) | ||
236 | return -ENOMEM; | ||
237 | memcpy(relocated_restore_code, &core_restore_code, | ||
238 | &restore_registers - &core_restore_code); | ||
239 | |||
225 | restore_image(); | 240 | restore_image(); |
226 | return 0; | 241 | return 0; |
227 | } | 242 | } |
@@ -236,4 +251,41 @@ int pfn_is_nosave(unsigned long pfn) | |||
236 | unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; | 251 | unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; |
237 | return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); | 252 | return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); |
238 | } | 253 | } |
254 | |||
255 | struct restore_data_record { | ||
256 | unsigned long jump_address; | ||
257 | unsigned long control; | ||
258 | }; | ||
259 | |||
260 | #define RESTORE_MAGIC 0x0123456789ABCDEFUL | ||
261 | |||
262 | /** | ||
263 | * arch_hibernation_header_save - populate the architecture specific part | ||
264 | * of a hibernation image header | ||
265 | * @addr: address to save the data at | ||
266 | */ | ||
267 | int arch_hibernation_header_save(void *addr, unsigned int max_size) | ||
268 | { | ||
269 | struct restore_data_record *rdr = addr; | ||
270 | |||
271 | if (max_size < sizeof(struct restore_data_record)) | ||
272 | return -EOVERFLOW; | ||
273 | rdr->jump_address = restore_jump_address; | ||
274 | rdr->control = (restore_jump_address ^ RESTORE_MAGIC); | ||
275 | return 0; | ||
276 | } | ||
277 | |||
278 | /** | ||
279 | * arch_hibernation_header_restore - read the architecture specific data | ||
280 | * from the hibernation image header | ||
281 | * @addr: address to read the data from | ||
282 | */ | ||
283 | int arch_hibernation_header_restore(void *addr) | ||
284 | { | ||
285 | struct restore_data_record *rdr = addr; | ||
286 | |||
287 | restore_jump_address = rdr->jump_address; | ||
288 | return (rdr->control == (restore_jump_address ^ RESTORE_MAGIC)) ? | ||
289 | 0 : -EINVAL; | ||
290 | } | ||
239 | #endif /* CONFIG_HIBERNATION */ | 291 | #endif /* CONFIG_HIBERNATION */ |
diff --git a/arch/x86/kernel/suspend_asm_64.S b/arch/x86/kernel/suspend_asm_64.S index 16d183f67bc1..40a209e0525c 100644 --- a/arch/x86/kernel/suspend_asm_64.S +++ b/arch/x86/kernel/suspend_asm_64.S | |||
@@ -2,8 +2,8 @@ | |||
2 | * | 2 | * |
3 | * Distribute under GPLv2. | 3 | * Distribute under GPLv2. |
4 | * | 4 | * |
5 | * swsusp_arch_resume may not use any stack, nor any variable that is | 5 | * swsusp_arch_resume must not use any stack or any nonlocal variables while |
6 | * not "NoSave" during copying pages: | 6 | * copying pages: |
7 | * | 7 | * |
8 | * Its rewriting one kernel image with another. What is stack in "old" | 8 | * Its rewriting one kernel image with another. What is stack in "old" |
9 | * image could very well be data page in "new" image, and overwriting | 9 | * image could very well be data page in "new" image, and overwriting |
@@ -36,6 +36,10 @@ ENTRY(swsusp_arch_suspend) | |||
36 | movq %r15, saved_context_r15(%rip) | 36 | movq %r15, saved_context_r15(%rip) |
37 | pushfq ; popq saved_context_eflags(%rip) | 37 | pushfq ; popq saved_context_eflags(%rip) |
38 | 38 | ||
39 | /* save the address of restore_registers */ | ||
40 | movq $restore_registers, %rax | ||
41 | movq %rax, restore_jump_address(%rip) | ||
42 | |||
39 | call swsusp_save | 43 | call swsusp_save |
40 | ret | 44 | ret |
41 | 45 | ||
@@ -54,7 +58,16 @@ ENTRY(restore_image) | |||
54 | movq %rcx, %cr3; | 58 | movq %rcx, %cr3; |
55 | movq %rax, %cr4; # turn PGE back on | 59 | movq %rax, %cr4; # turn PGE back on |
56 | 60 | ||
61 | /* prepare to jump to the image kernel */ | ||
62 | movq restore_jump_address(%rip), %rax | ||
63 | |||
64 | /* prepare to copy image data to their original locations */ | ||
57 | movq restore_pblist(%rip), %rdx | 65 | movq restore_pblist(%rip), %rdx |
66 | movq relocated_restore_code(%rip), %rcx | ||
67 | jmpq *%rcx | ||
68 | |||
69 | /* code below has been relocated to a safe page */ | ||
70 | ENTRY(core_restore_code) | ||
58 | loop: | 71 | loop: |
59 | testq %rdx, %rdx | 72 | testq %rdx, %rdx |
60 | jz done | 73 | jz done |
@@ -62,7 +75,7 @@ loop: | |||
62 | /* get addresses from the pbe and copy the page */ | 75 | /* get addresses from the pbe and copy the page */ |
63 | movq pbe_address(%rdx), %rsi | 76 | movq pbe_address(%rdx), %rsi |
64 | movq pbe_orig_address(%rdx), %rdi | 77 | movq pbe_orig_address(%rdx), %rdi |
65 | movq $512, %rcx | 78 | movq $(PAGE_SIZE >> 3), %rcx |
66 | rep | 79 | rep |
67 | movsq | 80 | movsq |
68 | 81 | ||
@@ -70,6 +83,20 @@ loop: | |||
70 | movq pbe_next(%rdx), %rdx | 83 | movq pbe_next(%rdx), %rdx |
71 | jmp loop | 84 | jmp loop |
72 | done: | 85 | done: |
86 | /* jump to the restore_registers address from the image header */ | ||
87 | jmpq *%rax | ||
88 | /* | ||
89 | * NOTE: This assumes that the boot kernel's text mapping covers the | ||
90 | * image kernel's page containing restore_registers and the address of | ||
91 | * this page is the same as in the image kernel's text mapping (it | ||
92 | * should always be true, because the text mapping is linear, starting | ||
93 | * from 0, and is supposed to cover the entire kernel text for every | ||
94 | * kernel). | ||
95 | * | ||
96 | * code below belongs to the image kernel | ||
97 | */ | ||
98 | |||
99 | ENTRY(restore_registers) | ||
73 | /* go back to the original page tables */ | 100 | /* go back to the original page tables */ |
74 | movq $(init_level4_pgt - __START_KERNEL_map), %rax | 101 | movq $(init_level4_pgt - __START_KERNEL_map), %rax |
75 | addq phys_base(%rip), %rax | 102 | addq phys_base(%rip), %rax |
@@ -84,12 +111,9 @@ done: | |||
84 | movq %rcx, %cr3 | 111 | movq %rcx, %cr3 |
85 | movq %rax, %cr4; # turn PGE back on | 112 | movq %rax, %cr4; # turn PGE back on |
86 | 113 | ||
87 | movl $24, %eax | ||
88 | movl %eax, %ds | ||
89 | |||
90 | movq saved_context_esp(%rip), %rsp | 114 | movq saved_context_esp(%rip), %rsp |
91 | movq saved_context_ebp(%rip), %rbp | 115 | movq saved_context_ebp(%rip), %rbp |
92 | /* Don't restore %rax, it must be 0 anyway */ | 116 | /* restore GPRs (we don't restore %rax, it must be 0 anyway) */ |
93 | movq saved_context_ebx(%rip), %rbx | 117 | movq saved_context_ebx(%rip), %rbx |
94 | movq saved_context_ecx(%rip), %rcx | 118 | movq saved_context_ecx(%rip), %rcx |
95 | movq saved_context_edx(%rip), %rdx | 119 | movq saved_context_edx(%rip), %rdx |
@@ -107,4 +131,7 @@ done: | |||
107 | 131 | ||
108 | xorq %rax, %rax | 132 | xorq %rax, %rax |
109 | 133 | ||
134 | /* tell the hibernation core that we've just restored the memory */ | ||
135 | movq %rax, in_suspend(%rip) | ||
136 | |||
110 | ret | 137 | ret |