diff options
Diffstat (limited to 'arch/x86/power')
-rw-r--r-- | arch/x86/power/Makefile | 9 | ||||
-rw-r--r-- | arch/x86/power/hibernate_asm_64.S | 146 | ||||
-rw-r--r-- | arch/x86/power/suspend_64.c | 321 |
3 files changed, 474 insertions, 2 deletions
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile index d764ec95006..8ce87fb4abb 100644 --- a/arch/x86/power/Makefile +++ b/arch/x86/power/Makefile | |||
@@ -1,2 +1,7 @@ | |||
1 | obj-$(CONFIG_PM) += cpu.o | 1 | ifeq ($(CONFIG_X86_64),y) |
2 | obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o | 2 | obj-$(CONFIG_PM) += suspend_64.o |
3 | obj-$(CONFIG_HIBERNATION) += hibernate_asm_64.o | ||
4 | else | ||
5 | obj-$(CONFIG_PM) += cpu.o | ||
6 | obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o | ||
7 | endif | ||
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S new file mode 100644 index 00000000000..1deb3244b99 --- /dev/null +++ b/arch/x86/power/hibernate_asm_64.S | |||
@@ -0,0 +1,146 @@ | |||
1 | /* | ||
2 | * Hibernation support for x86-64 | ||
3 | * | ||
4 | * Distribute under GPLv2. | ||
5 | * | ||
6 | * Copyright 2007 Rafael J. Wysocki <rjw@sisk.pl> | ||
7 | * Copyright 2005 Andi Kleen <ak@suse.de> | ||
8 | * Copyright 2004 Pavel Machek <pavel@suse.cz> | ||
9 | * | ||
10 | * swsusp_arch_resume must not use any stack or any nonlocal variables while | ||
11 | * copying pages: | ||
12 | * | ||
13 | * Its rewriting one kernel image with another. What is stack in "old" | ||
14 | * image could very well be data page in "new" image, and overwriting | ||
15 | * your own stack under you is bad idea. | ||
16 | */ | ||
17 | |||
18 | .text | ||
19 | #include <linux/linkage.h> | ||
20 | #include <asm/segment.h> | ||
21 | #include <asm/page.h> | ||
22 | #include <asm/asm-offsets.h> | ||
23 | |||
24 | ENTRY(swsusp_arch_suspend) | ||
25 | movq $saved_context, %rax | ||
26 | movq %rsp, pt_regs_sp(%rax) | ||
27 | movq %rbp, pt_regs_bp(%rax) | ||
28 | movq %rsi, pt_regs_si(%rax) | ||
29 | movq %rdi, pt_regs_di(%rax) | ||
30 | movq %rbx, pt_regs_bx(%rax) | ||
31 | movq %rcx, pt_regs_cx(%rax) | ||
32 | movq %rdx, pt_regs_dx(%rax) | ||
33 | movq %r8, pt_regs_r8(%rax) | ||
34 | movq %r9, pt_regs_r9(%rax) | ||
35 | movq %r10, pt_regs_r10(%rax) | ||
36 | movq %r11, pt_regs_r11(%rax) | ||
37 | movq %r12, pt_regs_r12(%rax) | ||
38 | movq %r13, pt_regs_r13(%rax) | ||
39 | movq %r14, pt_regs_r14(%rax) | ||
40 | movq %r15, pt_regs_r15(%rax) | ||
41 | pushfq | ||
42 | popq pt_regs_flags(%rax) | ||
43 | |||
44 | /* save the address of restore_registers */ | ||
45 | movq $restore_registers, %rax | ||
46 | movq %rax, restore_jump_address(%rip) | ||
47 | /* save cr3 */ | ||
48 | movq %cr3, %rax | ||
49 | movq %rax, restore_cr3(%rip) | ||
50 | |||
51 | call swsusp_save | ||
52 | ret | ||
53 | |||
54 | ENTRY(restore_image) | ||
55 | /* switch to temporary page tables */ | ||
56 | movq $__PAGE_OFFSET, %rdx | ||
57 | movq temp_level4_pgt(%rip), %rax | ||
58 | subq %rdx, %rax | ||
59 | movq %rax, %cr3 | ||
60 | /* Flush TLB */ | ||
61 | movq mmu_cr4_features(%rip), %rax | ||
62 | movq %rax, %rdx | ||
63 | andq $~(1<<7), %rdx # PGE | ||
64 | movq %rdx, %cr4; # turn off PGE | ||
65 | movq %cr3, %rcx; # flush TLB | ||
66 | movq %rcx, %cr3; | ||
67 | movq %rax, %cr4; # turn PGE back on | ||
68 | |||
69 | /* prepare to jump to the image kernel */ | ||
70 | movq restore_jump_address(%rip), %rax | ||
71 | movq restore_cr3(%rip), %rbx | ||
72 | |||
73 | /* prepare to copy image data to their original locations */ | ||
74 | movq restore_pblist(%rip), %rdx | ||
75 | movq relocated_restore_code(%rip), %rcx | ||
76 | jmpq *%rcx | ||
77 | |||
78 | /* code below has been relocated to a safe page */ | ||
79 | ENTRY(core_restore_code) | ||
80 | loop: | ||
81 | testq %rdx, %rdx | ||
82 | jz done | ||
83 | |||
84 | /* get addresses from the pbe and copy the page */ | ||
85 | movq pbe_address(%rdx), %rsi | ||
86 | movq pbe_orig_address(%rdx), %rdi | ||
87 | movq $(PAGE_SIZE >> 3), %rcx | ||
88 | rep | ||
89 | movsq | ||
90 | |||
91 | /* progress to the next pbe */ | ||
92 | movq pbe_next(%rdx), %rdx | ||
93 | jmp loop | ||
94 | done: | ||
95 | /* jump to the restore_registers address from the image header */ | ||
96 | jmpq *%rax | ||
97 | /* | ||
98 | * NOTE: This assumes that the boot kernel's text mapping covers the | ||
99 | * image kernel's page containing restore_registers and the address of | ||
100 | * this page is the same as in the image kernel's text mapping (it | ||
101 | * should always be true, because the text mapping is linear, starting | ||
102 | * from 0, and is supposed to cover the entire kernel text for every | ||
103 | * kernel). | ||
104 | * | ||
105 | * code below belongs to the image kernel | ||
106 | */ | ||
107 | |||
108 | ENTRY(restore_registers) | ||
109 | /* go back to the original page tables */ | ||
110 | movq %rbx, %cr3 | ||
111 | |||
112 | /* Flush TLB, including "global" things (vmalloc) */ | ||
113 | movq mmu_cr4_features(%rip), %rax | ||
114 | movq %rax, %rdx | ||
115 | andq $~(1<<7), %rdx; # PGE | ||
116 | movq %rdx, %cr4; # turn off PGE | ||
117 | movq %cr3, %rcx; # flush TLB | ||
118 | movq %rcx, %cr3 | ||
119 | movq %rax, %cr4; # turn PGE back on | ||
120 | |||
121 | /* We don't restore %rax, it must be 0 anyway */ | ||
122 | movq $saved_context, %rax | ||
123 | movq pt_regs_sp(%rax), %rsp | ||
124 | movq pt_regs_bp(%rax), %rbp | ||
125 | movq pt_regs_si(%rax), %rsi | ||
126 | movq pt_regs_di(%rax), %rdi | ||
127 | movq pt_regs_bx(%rax), %rbx | ||
128 | movq pt_regs_cx(%rax), %rcx | ||
129 | movq pt_regs_dx(%rax), %rdx | ||
130 | movq pt_regs_r8(%rax), %r8 | ||
131 | movq pt_regs_r9(%rax), %r9 | ||
132 | movq pt_regs_r10(%rax), %r10 | ||
133 | movq pt_regs_r11(%rax), %r11 | ||
134 | movq pt_regs_r12(%rax), %r12 | ||
135 | movq pt_regs_r13(%rax), %r13 | ||
136 | movq pt_regs_r14(%rax), %r14 | ||
137 | movq pt_regs_r15(%rax), %r15 | ||
138 | pushq pt_regs_flags(%rax) | ||
139 | popfq | ||
140 | |||
141 | xorq %rax, %rax | ||
142 | |||
143 | /* tell the hibernation core that we've just restored the memory */ | ||
144 | movq %rax, in_suspend(%rip) | ||
145 | |||
146 | ret | ||
diff --git a/arch/x86/power/suspend_64.c b/arch/x86/power/suspend_64.c new file mode 100644 index 00000000000..d51dbf21d02 --- /dev/null +++ b/arch/x86/power/suspend_64.c | |||
@@ -0,0 +1,321 @@ | |||
1 | /* | ||
2 | * Suspend and hibernation support for x86-64 | ||
3 | * | ||
4 | * Distribute under GPLv2 | ||
5 | * | ||
6 | * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl> | ||
7 | * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> | ||
8 | * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> | ||
9 | */ | ||
10 | |||
11 | #include <linux/smp.h> | ||
12 | #include <linux/suspend.h> | ||
13 | #include <asm/proto.h> | ||
14 | #include <asm/page.h> | ||
15 | #include <asm/pgtable.h> | ||
16 | #include <asm/mtrr.h> | ||
17 | |||
18 | /* References to section boundaries */ | ||
19 | extern const void __nosave_begin, __nosave_end; | ||
20 | |||
21 | static void fix_processor_context(void); | ||
22 | |||
23 | struct saved_context saved_context; | ||
24 | |||
25 | /** | ||
26 | * __save_processor_state - save CPU registers before creating a | ||
27 | * hibernation image and before restoring the memory state from it | ||
28 | * @ctxt - structure to store the registers contents in | ||
29 | * | ||
30 | * NOTE: If there is a CPU register the modification of which by the | ||
31 | * boot kernel (ie. the kernel used for loading the hibernation image) | ||
32 | * might affect the operations of the restored target kernel (ie. the one | ||
33 | * saved in the hibernation image), then its contents must be saved by this | ||
34 | * function. In other words, if kernel A is hibernated and different | ||
35 | * kernel B is used for loading the hibernation image into memory, the | ||
36 | * kernel A's __save_processor_state() function must save all registers | ||
37 | * needed by kernel A, so that it can operate correctly after the resume | ||
38 | * regardless of what kernel B does in the meantime. | ||
39 | */ | ||
40 | static void __save_processor_state(struct saved_context *ctxt) | ||
41 | { | ||
42 | kernel_fpu_begin(); | ||
43 | |||
44 | /* | ||
45 | * descriptor tables | ||
46 | */ | ||
47 | store_gdt((struct desc_ptr *)&ctxt->gdt_limit); | ||
48 | store_idt((struct desc_ptr *)&ctxt->idt_limit); | ||
49 | store_tr(ctxt->tr); | ||
50 | |||
51 | /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ | ||
52 | /* | ||
53 | * segment registers | ||
54 | */ | ||
55 | asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds)); | ||
56 | asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); | ||
57 | asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); | ||
58 | asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs)); | ||
59 | asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss)); | ||
60 | |||
61 | rdmsrl(MSR_FS_BASE, ctxt->fs_base); | ||
62 | rdmsrl(MSR_GS_BASE, ctxt->gs_base); | ||
63 | rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); | ||
64 | mtrr_save_fixed_ranges(NULL); | ||
65 | |||
66 | /* | ||
67 | * control registers | ||
68 | */ | ||
69 | rdmsrl(MSR_EFER, ctxt->efer); | ||
70 | ctxt->cr0 = read_cr0(); | ||
71 | ctxt->cr2 = read_cr2(); | ||
72 | ctxt->cr3 = read_cr3(); | ||
73 | ctxt->cr4 = read_cr4(); | ||
74 | ctxt->cr8 = read_cr8(); | ||
75 | } | ||
76 | |||
77 | void save_processor_state(void) | ||
78 | { | ||
79 | __save_processor_state(&saved_context); | ||
80 | } | ||
81 | |||
82 | static void do_fpu_end(void) | ||
83 | { | ||
84 | /* | ||
85 | * Restore FPU regs if necessary | ||
86 | */ | ||
87 | kernel_fpu_end(); | ||
88 | } | ||
89 | |||
90 | /** | ||
91 | * __restore_processor_state - restore the contents of CPU registers saved | ||
92 | * by __save_processor_state() | ||
93 | * @ctxt - structure to load the registers contents from | ||
94 | */ | ||
95 | static void __restore_processor_state(struct saved_context *ctxt) | ||
96 | { | ||
97 | /* | ||
98 | * control registers | ||
99 | */ | ||
100 | wrmsrl(MSR_EFER, ctxt->efer); | ||
101 | write_cr8(ctxt->cr8); | ||
102 | write_cr4(ctxt->cr4); | ||
103 | write_cr3(ctxt->cr3); | ||
104 | write_cr2(ctxt->cr2); | ||
105 | write_cr0(ctxt->cr0); | ||
106 | |||
107 | /* | ||
108 | * now restore the descriptor tables to their proper values | ||
109 | * ltr is done i fix_processor_context(). | ||
110 | */ | ||
111 | load_gdt((const struct desc_ptr *)&ctxt->gdt_limit); | ||
112 | load_idt((const struct desc_ptr *)&ctxt->idt_limit); | ||
113 | |||
114 | |||
115 | /* | ||
116 | * segment registers | ||
117 | */ | ||
118 | asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); | ||
119 | asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); | ||
120 | asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs)); | ||
121 | load_gs_index(ctxt->gs); | ||
122 | asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss)); | ||
123 | |||
124 | wrmsrl(MSR_FS_BASE, ctxt->fs_base); | ||
125 | wrmsrl(MSR_GS_BASE, ctxt->gs_base); | ||
126 | wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); | ||
127 | |||
128 | fix_processor_context(); | ||
129 | |||
130 | do_fpu_end(); | ||
131 | mtrr_ap_init(); | ||
132 | } | ||
133 | |||
134 | void restore_processor_state(void) | ||
135 | { | ||
136 | __restore_processor_state(&saved_context); | ||
137 | } | ||
138 | |||
139 | static void fix_processor_context(void) | ||
140 | { | ||
141 | int cpu = smp_processor_id(); | ||
142 | struct tss_struct *t = &per_cpu(init_tss, cpu); | ||
143 | |||
144 | /* | ||
145 | * This just modifies memory; should not be necessary. But... This | ||
146 | * is necessary, because 386 hardware has concept of busy TSS or some | ||
147 | * similar stupidity. | ||
148 | */ | ||
149 | set_tss_desc(cpu, t); | ||
150 | |||
151 | get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; | ||
152 | |||
153 | syscall_init(); /* This sets MSR_*STAR and related */ | ||
154 | load_TR_desc(); /* This does ltr */ | ||
155 | load_LDT(¤t->active_mm->context); /* This does lldt */ | ||
156 | |||
157 | /* | ||
158 | * Now maybe reload the debug registers | ||
159 | */ | ||
160 | if (current->thread.debugreg7){ | ||
161 | loaddebug(¤t->thread, 0); | ||
162 | loaddebug(¤t->thread, 1); | ||
163 | loaddebug(¤t->thread, 2); | ||
164 | loaddebug(¤t->thread, 3); | ||
165 | /* no 4 and 5 */ | ||
166 | loaddebug(¤t->thread, 6); | ||
167 | loaddebug(¤t->thread, 7); | ||
168 | } | ||
169 | } | ||
170 | |||
171 | #ifdef CONFIG_HIBERNATION | ||
172 | /* Defined in arch/x86_64/kernel/suspend_asm.S */ | ||
173 | extern int restore_image(void); | ||
174 | |||
175 | /* | ||
176 | * Address to jump to in the last phase of restore in order to get to the image | ||
177 | * kernel's text (this value is passed in the image header). | ||
178 | */ | ||
179 | unsigned long restore_jump_address; | ||
180 | |||
181 | /* | ||
182 | * Value of the cr3 register from before the hibernation (this value is passed | ||
183 | * in the image header). | ||
184 | */ | ||
185 | unsigned long restore_cr3; | ||
186 | |||
187 | pgd_t *temp_level4_pgt; | ||
188 | |||
189 | void *relocated_restore_code; | ||
190 | |||
191 | static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) | ||
192 | { | ||
193 | long i, j; | ||
194 | |||
195 | i = pud_index(address); | ||
196 | pud = pud + i; | ||
197 | for (; i < PTRS_PER_PUD; pud++, i++) { | ||
198 | unsigned long paddr; | ||
199 | pmd_t *pmd; | ||
200 | |||
201 | paddr = address + i*PUD_SIZE; | ||
202 | if (paddr >= end) | ||
203 | break; | ||
204 | |||
205 | pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); | ||
206 | if (!pmd) | ||
207 | return -ENOMEM; | ||
208 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
209 | for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { | ||
210 | unsigned long pe; | ||
211 | |||
212 | if (paddr >= end) | ||
213 | break; | ||
214 | pe = __PAGE_KERNEL_LARGE_EXEC | paddr; | ||
215 | pe &= __supported_pte_mask; | ||
216 | set_pmd(pmd, __pmd(pe)); | ||
217 | } | ||
218 | } | ||
219 | return 0; | ||
220 | } | ||
221 | |||
222 | static int set_up_temporary_mappings(void) | ||
223 | { | ||
224 | unsigned long start, end, next; | ||
225 | int error; | ||
226 | |||
227 | temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); | ||
228 | if (!temp_level4_pgt) | ||
229 | return -ENOMEM; | ||
230 | |||
231 | /* It is safe to reuse the original kernel mapping */ | ||
232 | set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), | ||
233 | init_level4_pgt[pgd_index(__START_KERNEL_map)]); | ||
234 | |||
235 | /* Set up the direct mapping from scratch */ | ||
236 | start = (unsigned long)pfn_to_kaddr(0); | ||
237 | end = (unsigned long)pfn_to_kaddr(end_pfn); | ||
238 | |||
239 | for (; start < end; start = next) { | ||
240 | pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); | ||
241 | if (!pud) | ||
242 | return -ENOMEM; | ||
243 | next = start + PGDIR_SIZE; | ||
244 | if (next > end) | ||
245 | next = end; | ||
246 | if ((error = res_phys_pud_init(pud, __pa(start), __pa(next)))) | ||
247 | return error; | ||
248 | set_pgd(temp_level4_pgt + pgd_index(start), | ||
249 | mk_kernel_pgd(__pa(pud))); | ||
250 | } | ||
251 | return 0; | ||
252 | } | ||
253 | |||
254 | int swsusp_arch_resume(void) | ||
255 | { | ||
256 | int error; | ||
257 | |||
258 | /* We have got enough memory and from now on we cannot recover */ | ||
259 | if ((error = set_up_temporary_mappings())) | ||
260 | return error; | ||
261 | |||
262 | relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC); | ||
263 | if (!relocated_restore_code) | ||
264 | return -ENOMEM; | ||
265 | memcpy(relocated_restore_code, &core_restore_code, | ||
266 | &restore_registers - &core_restore_code); | ||
267 | |||
268 | restore_image(); | ||
269 | return 0; | ||
270 | } | ||
271 | |||
272 | /* | ||
273 | * pfn_is_nosave - check if given pfn is in the 'nosave' section | ||
274 | */ | ||
275 | |||
276 | int pfn_is_nosave(unsigned long pfn) | ||
277 | { | ||
278 | unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; | ||
279 | unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; | ||
280 | return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); | ||
281 | } | ||
282 | |||
283 | struct restore_data_record { | ||
284 | unsigned long jump_address; | ||
285 | unsigned long cr3; | ||
286 | unsigned long magic; | ||
287 | }; | ||
288 | |||
289 | #define RESTORE_MAGIC 0x0123456789ABCDEFUL | ||
290 | |||
291 | /** | ||
292 | * arch_hibernation_header_save - populate the architecture specific part | ||
293 | * of a hibernation image header | ||
294 | * @addr: address to save the data at | ||
295 | */ | ||
296 | int arch_hibernation_header_save(void *addr, unsigned int max_size) | ||
297 | { | ||
298 | struct restore_data_record *rdr = addr; | ||
299 | |||
300 | if (max_size < sizeof(struct restore_data_record)) | ||
301 | return -EOVERFLOW; | ||
302 | rdr->jump_address = restore_jump_address; | ||
303 | rdr->cr3 = restore_cr3; | ||
304 | rdr->magic = RESTORE_MAGIC; | ||
305 | return 0; | ||
306 | } | ||
307 | |||
308 | /** | ||
309 | * arch_hibernation_header_restore - read the architecture specific data | ||
310 | * from the hibernation image header | ||
311 | * @addr: address to read the data from | ||
312 | */ | ||
313 | int arch_hibernation_header_restore(void *addr) | ||
314 | { | ||
315 | struct restore_data_record *rdr = addr; | ||
316 | |||
317 | restore_jump_address = rdr->jump_address; | ||
318 | restore_cr3 = rdr->cr3; | ||
319 | return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; | ||
320 | } | ||
321 | #endif /* CONFIG_HIBERNATION */ | ||