aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig.debug6
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/printf.c24
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c2
-rw-r--r--arch/x86/kernel/entry_32.S15
-rw-r--r--arch/x86/kernel/entry_64.S18
-rw-r--r--arch/x86/kernel/geode_32.c5
-rw-r--r--arch/x86/kernel/head_32.S151
-rw-r--r--arch/x86/kernel/mfgpt_32.c123
-rw-r--r--arch/x86/kernel/setup_32.c4
-rw-r--r--arch/x86/kernel/topology.c2
-rw-r--r--arch/x86/mm/init_32.c74
-rw-r--r--arch/x86/mm/init_64.c8
-rw-r--r--arch/x86/mm/ioremap.c55
-rw-r--r--arch/x86/mm/pageattr.c140
-rw-r--r--arch/x86/power/Makefile4
-rw-r--r--arch/x86/power/cpu_32.c (renamed from arch/x86/power/cpu.c)2
-rw-r--r--arch/x86/power/cpu_64.c (renamed from arch/x86/kernel/suspend_64.c)160
-rw-r--r--arch/x86/power/hibernate_32.c (renamed from arch/x86/power/suspend.c)6
-rw-r--r--arch/x86/power/hibernate_64.c169
-rw-r--r--arch/x86/power/hibernate_asm_32.S (renamed from arch/x86/power/swsusp.S)3
-rw-r--r--arch/x86/power/hibernate_asm_64.S (renamed from arch/x86/kernel/suspend_asm_64.S)9
-rw-r--r--arch/x86/xen/mmu.c6
-rw-r--r--arch/x86/xen/time.c10
27 files changed, 609 insertions, 395 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index fa555148823d..864affc9a7b0 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -34,13 +34,9 @@ config DEBUG_STACK_USAGE
34 34
35 This option will slow down process creation somewhat. 35 This option will slow down process creation somewhat.
36 36
37comment "Page alloc debug is incompatible with Software Suspend on i386"
38 depends on DEBUG_KERNEL && HIBERNATION
39 depends on X86_32
40
41config DEBUG_PAGEALLOC 37config DEBUG_PAGEALLOC
42 bool "Debug page memory allocations" 38 bool "Debug page memory allocations"
43 depends on DEBUG_KERNEL && X86_32 39 depends on DEBUG_KERNEL
44 help 40 help
45 Unmap pages from the kernel linear mapping after free_pages(). 41 Unmap pages from the kernel linear mapping after free_pages().
46 This results in a large slowdown, but helps to find certain types 42 This results in a large slowdown, but helps to find certain types
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 364865b1b08d..204af43535c5 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -191,8 +191,10 @@ drivers-$(CONFIG_PCI) += arch/x86/pci/
191# must be linked after kernel/ 191# must be linked after kernel/
192drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/ 192drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
193 193
194ifeq ($(CONFIG_X86_32),y) 194# suspend and hibernation support
195drivers-$(CONFIG_PM) += arch/x86/power/ 195drivers-$(CONFIG_PM) += arch/x86/power/
196
197ifeq ($(CONFIG_X86_32),y)
196drivers-$(CONFIG_FB) += arch/x86/video/ 198drivers-$(CONFIG_FB) += arch/x86/video/
197endif 199endif
198 200
diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c
index 1a09f9309d3c..7e7e890699be 100644
--- a/arch/x86/boot/printf.c
+++ b/arch/x86/boot/printf.c
@@ -33,8 +33,8 @@ static int skip_atoi(const char **s)
33#define PLUS 4 /* show plus */ 33#define PLUS 4 /* show plus */
34#define SPACE 8 /* space if plus */ 34#define SPACE 8 /* space if plus */
35#define LEFT 16 /* left justified */ 35#define LEFT 16 /* left justified */
36#define SPECIAL 32 /* 0x */ 36#define SMALL 32 /* Must be 32 == 0x20 */
37#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ 37#define SPECIAL 64 /* 0x */
38 38
39#define do_div(n,base) ({ \ 39#define do_div(n,base) ({ \
40int __res; \ 40int __res; \
@@ -45,12 +45,16 @@ __res; })
45static char *number(char *str, long num, int base, int size, int precision, 45static char *number(char *str, long num, int base, int size, int precision,
46 int type) 46 int type)
47{ 47{
48 char c, sign, tmp[66]; 48 /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
49 const char *digits = "0123456789abcdefghijklmnopqrstuvwxyz"; 49 static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
50
51 char tmp[66];
52 char c, sign, locase;
50 int i; 53 int i;
51 54
52 if (type & LARGE) 55 /* locase = 0 or 0x20. ORing digits or letters with 'locase'
53 digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 56 * produces same digits or (maybe lowercased) letters */
57 locase = (type & SMALL);
54 if (type & LEFT) 58 if (type & LEFT)
55 type &= ~ZEROPAD; 59 type &= ~ZEROPAD;
56 if (base < 2 || base > 36) 60 if (base < 2 || base > 36)
@@ -81,7 +85,7 @@ static char *number(char *str, long num, int base, int size, int precision,
81 tmp[i++] = '0'; 85 tmp[i++] = '0';
82 else 86 else
83 while (num != 0) 87 while (num != 0)
84 tmp[i++] = digits[do_div(num, base)]; 88 tmp[i++] = (digits[do_div(num, base)] | locase);
85 if (i > precision) 89 if (i > precision)
86 precision = i; 90 precision = i;
87 size -= precision; 91 size -= precision;
@@ -95,7 +99,7 @@ static char *number(char *str, long num, int base, int size, int precision,
95 *str++ = '0'; 99 *str++ = '0';
96 else if (base == 16) { 100 else if (base == 16) {
97 *str++ = '0'; 101 *str++ = '0';
98 *str++ = digits[33]; 102 *str++ = ('X' | locase);
99 } 103 }
100 } 104 }
101 if (!(type & LEFT)) 105 if (!(type & LEFT))
@@ -244,9 +248,9 @@ int vsprintf(char *buf, const char *fmt, va_list args)
244 base = 8; 248 base = 8;
245 break; 249 break;
246 250
247 case 'X':
248 flags |= LARGE;
249 case 'x': 251 case 'x':
252 flags |= SMALL;
253 case 'X':
250 base = 16; 254 base = 16;
251 break; 255 break;
252 256
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 77562e7cdab6..3df340b54e57 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1421,7 +1421,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
1421# CONFIG_DEBUG_VM is not set 1421# CONFIG_DEBUG_VM is not set
1422# CONFIG_DEBUG_LIST is not set 1422# CONFIG_DEBUG_LIST is not set
1423# CONFIG_FRAME_POINTER is not set 1423# CONFIG_FRAME_POINTER is not set
1424# CONFIG_FORCED_INLINING is not set
1425# CONFIG_RCU_TORTURE_TEST is not set 1424# CONFIG_RCU_TORTURE_TEST is not set
1426# CONFIG_LKDTM is not set 1425# CONFIG_LKDTM is not set
1427# CONFIG_FAULT_INJECTION is not set 1426# CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 9e2b0ef851de..eef98cb00c62 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1346,7 +1346,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
1346# CONFIG_DEBUG_VM is not set 1346# CONFIG_DEBUG_VM is not set
1347# CONFIG_DEBUG_LIST is not set 1347# CONFIG_DEBUG_LIST is not set
1348# CONFIG_FRAME_POINTER is not set 1348# CONFIG_FRAME_POINTER is not set
1349# CONFIG_FORCED_INLINING is not set
1350# CONFIG_RCU_TORTURE_TEST is not set 1349# CONFIG_RCU_TORTURE_TEST is not set
1351# CONFIG_LKDTM is not set 1350# CONFIG_LKDTM is not set
1352# CONFIG_FAULT_INJECTION is not set 1351# CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 21dc1a061bf1..76ec0f8f138a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -84,8 +84,6 @@ ifeq ($(CONFIG_X86_64),y)
84 obj-y += genapic_64.o genapic_flat_64.o 84 obj-y += genapic_64.o genapic_flat_64.o
85 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 85 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
86 obj-$(CONFIG_AUDIT) += audit_64.o 86 obj-$(CONFIG_AUDIT) += audit_64.o
87 obj-$(CONFIG_PM) += suspend_64.o
88 obj-$(CONFIG_HIBERNATION) += suspend_asm_64.o
89 87
90 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 88 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
91 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o 89 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 24885be5c48c..9b7e01daa1ca 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -118,7 +118,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
118 118
119static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) 119static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
120{ 120{
121 return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); 121 sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
122} 122}
123 123
124/* Mutex protecting device creation against CPU hotplug */ 124/* Mutex protecting device creation against CPU hotplug */
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index be5c31d04884..824e21b80aad 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -409,7 +409,8 @@ restore_nocheck_notrace:
409 RESTORE_REGS 409 RESTORE_REGS
410 addl $4, %esp # skip orig_eax/error_code 410 addl $4, %esp # skip orig_eax/error_code
411 CFI_ADJUST_CFA_OFFSET -4 411 CFI_ADJUST_CFA_OFFSET -4
4121: INTERRUPT_RETURN 412ENTRY(irq_return)
413 INTERRUPT_RETURN
413.section .fixup,"ax" 414.section .fixup,"ax"
414iret_exc: 415iret_exc:
415 pushl $0 # no error code 416 pushl $0 # no error code
@@ -418,7 +419,7 @@ iret_exc:
418.previous 419.previous
419.section __ex_table,"a" 420.section __ex_table,"a"
420 .align 4 421 .align 4
421 .long 1b,iret_exc 422 .long irq_return,iret_exc
422.previous 423.previous
423 424
424 CFI_RESTORE_STATE 425 CFI_RESTORE_STATE
@@ -865,20 +866,16 @@ nmi_espfix_stack:
865 RESTORE_REGS 866 RESTORE_REGS
866 lss 12+4(%esp), %esp # back to espfix stack 867 lss 12+4(%esp), %esp # back to espfix stack
867 CFI_ADJUST_CFA_OFFSET -24 868 CFI_ADJUST_CFA_OFFSET -24
8681: INTERRUPT_RETURN 869 jmp irq_return
869 CFI_ENDPROC 870 CFI_ENDPROC
870.section __ex_table,"a"
871 .align 4
872 .long 1b,iret_exc
873.previous
874KPROBE_END(nmi) 871KPROBE_END(nmi)
875 872
876#ifdef CONFIG_PARAVIRT 873#ifdef CONFIG_PARAVIRT
877ENTRY(native_iret) 874ENTRY(native_iret)
8781: iret 875 iret
879.section __ex_table,"a" 876.section __ex_table,"a"
880 .align 4 877 .align 4
881 .long 1b,iret_exc 878 .long native_iret, iret_exc
882.previous 879.previous
883END(native_iret) 880END(native_iret)
884 881
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c7341e81941c..6be39a387c5a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -581,16 +581,24 @@ retint_restore_args: /* return to kernel space */
581 */ 581 */
582 TRACE_IRQS_IRETQ 582 TRACE_IRQS_IRETQ
583restore_args: 583restore_args:
584 RESTORE_ARGS 0,8,0 584 RESTORE_ARGS 0,8,0
585#ifdef CONFIG_PARAVIRT 585
586ENTRY(irq_return)
586 INTERRUPT_RETURN 587 INTERRUPT_RETURN
587#endif 588
589 .section __ex_table, "a"
590 .quad irq_return, bad_iret
591 .previous
592
593#ifdef CONFIG_PARAVIRT
588ENTRY(native_iret) 594ENTRY(native_iret)
589 iretq 595 iretq
590 596
591 .section __ex_table,"a" 597 .section __ex_table,"a"
592 .quad native_iret, bad_iret 598 .quad native_iret, bad_iret
593 .previous 599 .previous
600#endif
601
594 .section .fixup,"ax" 602 .section .fixup,"ax"
595bad_iret: 603bad_iret:
596 /* 604 /*
@@ -804,7 +812,7 @@ paranoid_swapgs\trace:
804 SWAPGS_UNSAFE_STACK 812 SWAPGS_UNSAFE_STACK
805paranoid_restore\trace: 813paranoid_restore\trace:
806 RESTORE_ALL 8 814 RESTORE_ALL 8
807 INTERRUPT_RETURN 815 jmp irq_return
808paranoid_userspace\trace: 816paranoid_userspace\trace:
809 GET_THREAD_INFO(%rcx) 817 GET_THREAD_INFO(%rcx)
810 movl threadinfo_flags(%rcx),%ebx 818 movl threadinfo_flags(%rcx),%ebx
@@ -919,7 +927,7 @@ error_kernelspace:
919 iret run with kernel gs again, so don't set the user space flag. 927 iret run with kernel gs again, so don't set the user space flag.
920 B stepping K8s sometimes report an truncated RIP for IRET 928 B stepping K8s sometimes report an truncated RIP for IRET
921 exceptions returning to compat mode. Check for these here too. */ 929 exceptions returning to compat mode. Check for these here too. */
922 leaq native_iret(%rip),%rbp 930 leaq irq_return(%rip),%rbp
923 cmpq %rbp,RIP(%rsp) 931 cmpq %rbp,RIP(%rsp)
924 je error_swapgs 932 je error_swapgs
925 movl %ebp,%ebp /* zero extend */ 933 movl %ebp,%ebp /* zero extend */
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
index 9c7f7d395968..9dad6ca6cd70 100644
--- a/arch/x86/kernel/geode_32.c
+++ b/arch/x86/kernel/geode_32.c
@@ -163,14 +163,11 @@ EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
163 163
164static int __init geode_southbridge_init(void) 164static int __init geode_southbridge_init(void)
165{ 165{
166 int timers;
167
168 if (!is_geode()) 166 if (!is_geode())
169 return -ENODEV; 167 return -ENODEV;
170 168
171 init_lbars(); 169 init_lbars();
172 timers = geode_mfgpt_detect(); 170 (void) mfgpt_timer_setup();
173 printk(KERN_INFO "geode: %d MFGPT timers available.\n", timers);
174 return 0; 171 return 0;
175} 172}
176 173
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 5d8c5730686b..74ef4a41f224 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,6 +19,10 @@
19#include <asm/thread_info.h> 19#include <asm/thread_info.h>
20#include <asm/asm-offsets.h> 20#include <asm/asm-offsets.h>
21#include <asm/setup.h> 21#include <asm/setup.h>
22#include <asm/processor-flags.h>
23
24/* Physical address */
25#define pa(X) ((X) - __PAGE_OFFSET)
22 26
23/* 27/*
24 * References to members of the new_cpu_data structure. 28 * References to members of the new_cpu_data structure.
@@ -80,10 +84,6 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_
80 */ 84 */
81.section .text.head,"ax",@progbits 85.section .text.head,"ax",@progbits
82ENTRY(startup_32) 86ENTRY(startup_32)
83 /* check to see if KEEP_SEGMENTS flag is meaningful */
84 cmpw $0x207, BP_version(%esi)
85 jb 1f
86
87 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 87 /* test KEEP_SEGMENTS flag to see if the bootloader is asking
88 us to not reload segments */ 88 us to not reload segments */
89 testb $(1<<6), BP_loadflags(%esi) 89 testb $(1<<6), BP_loadflags(%esi)
@@ -92,7 +92,7 @@ ENTRY(startup_32)
92/* 92/*
93 * Set segments to known values. 93 * Set segments to known values.
94 */ 94 */
951: lgdt boot_gdt_descr - __PAGE_OFFSET 95 lgdt pa(boot_gdt_descr)
96 movl $(__BOOT_DS),%eax 96 movl $(__BOOT_DS),%eax
97 movl %eax,%ds 97 movl %eax,%ds
98 movl %eax,%es 98 movl %eax,%es
@@ -105,8 +105,8 @@ ENTRY(startup_32)
105 */ 105 */
106 cld 106 cld
107 xorl %eax,%eax 107 xorl %eax,%eax
108 movl $__bss_start - __PAGE_OFFSET,%edi 108 movl $pa(__bss_start),%edi
109 movl $__bss_stop - __PAGE_OFFSET,%ecx 109 movl $pa(__bss_stop),%ecx
110 subl %edi,%ecx 110 subl %edi,%ecx
111 shrl $2,%ecx 111 shrl $2,%ecx
112 rep ; stosl 112 rep ; stosl
@@ -118,31 +118,32 @@ ENTRY(startup_32)
118 * (kexec on panic case). Hence copy out the parameters before initializing 118 * (kexec on panic case). Hence copy out the parameters before initializing
119 * page tables. 119 * page tables.
120 */ 120 */
121 movl $(boot_params - __PAGE_OFFSET),%edi 121 movl $pa(boot_params),%edi
122 movl $(PARAM_SIZE/4),%ecx 122 movl $(PARAM_SIZE/4),%ecx
123 cld 123 cld
124 rep 124 rep
125 movsl 125 movsl
126 movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi 126 movl pa(boot_params) + NEW_CL_POINTER,%esi
127 andl %esi,%esi 127 andl %esi,%esi
128 jz 1f # No comand line 128 jz 1f # No comand line
129 movl $(boot_command_line - __PAGE_OFFSET),%edi 129 movl $pa(boot_command_line),%edi
130 movl $(COMMAND_LINE_SIZE/4),%ecx 130 movl $(COMMAND_LINE_SIZE/4),%ecx
131 rep 131 rep
132 movsl 132 movsl
1331: 1331:
134 134
135#ifdef CONFIG_PARAVIRT 135#ifdef CONFIG_PARAVIRT
136 cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET) 136 /* This is can only trip for a broken bootloader... */
137 cmpw $0x207, pa(boot_params + BP_version)
137 jb default_entry 138 jb default_entry
138 139
139 /* Paravirt-compatible boot parameters. Look to see what architecture 140 /* Paravirt-compatible boot parameters. Look to see what architecture
140 we're booting under. */ 141 we're booting under. */
141 movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax 142 movl pa(boot_params + BP_hardware_subarch), %eax
142 cmpl $num_subarch_entries, %eax 143 cmpl $num_subarch_entries, %eax
143 jae bad_subarch 144 jae bad_subarch
144 145
145 movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax 146 movl pa(subarch_entries)(,%eax,4), %eax
146 subl $__PAGE_OFFSET, %eax 147 subl $__PAGE_OFFSET, %eax
147 jmp *%eax 148 jmp *%eax
148 149
@@ -170,17 +171,68 @@ num_subarch_entries = (. - subarch_entries) / 4
170 * Mappings are created both at virtual address 0 (identity mapping) 171 * Mappings are created both at virtual address 0 (identity mapping)
171 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. 172 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
172 * 173 *
173 * Warning: don't use %esi or the stack in this code. However, %esp 174 * Note that the stack is not yet set up!
174 * can be used as a GPR if you really need it...
175 */ 175 */
176page_pde_offset = (__PAGE_OFFSET >> 20); 176#define PTE_ATTR 0x007 /* PRESENT+RW+USER */
177#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
178#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */
177 179
178default_entry: 180default_entry:
179 movl $(pg0 - __PAGE_OFFSET), %edi 181#ifdef CONFIG_X86_PAE
180 movl $(swapper_pg_dir - __PAGE_OFFSET), %edx 182
181 movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ 183 /*
184 * In PAE mode swapper_pg_dir is statically defined to contain enough
185 * entries to cover the VMSPLIT option (that is the top 1, 2 or 3
186 * entries). The identity mapping is handled by pointing two PGD
187 * entries to the first kernel PMD.
188 *
189 * Note the upper half of each PMD or PTE are always zero at
190 * this stage.
191 */
192
193#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */
194
195 xorl %ebx,%ebx /* %ebx is kept at zero */
196
197 movl $pa(pg0), %edi
198 movl $pa(swapper_pg_pmd), %edx
199 movl $PTE_ATTR, %eax
20010:
201 leal PDE_ATTR(%edi),%ecx /* Create PMD entry */
202 movl %ecx,(%edx) /* Store PMD entry */
203 /* Upper half already zero */
204 addl $8,%edx
205 movl $512,%ecx
20611:
207 stosl
208 xchgl %eax,%ebx
209 stosl
210 xchgl %eax,%ebx
211 addl $0x1000,%eax
212 loop 11b
213
214 /*
215 * End condition: we must map up to and including INIT_MAP_BEYOND_END
216 * bytes beyond the end of our own page tables.
217 */
218 leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
219 cmpl %ebp,%eax
220 jb 10b
2211:
222 movl %edi,pa(init_pg_tables_end)
223
224 /* Do early initialization of the fixmap area */
225 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
226 movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8)
227#else /* Not PAE */
228
229page_pde_offset = (__PAGE_OFFSET >> 20);
230
231 movl $pa(pg0), %edi
232 movl $pa(swapper_pg_dir), %edx
233 movl $PTE_ATTR, %eax
18210: 23410:
183 leal 0x007(%edi),%ecx /* Create PDE entry */ 235 leal PDE_ATTR(%edi),%ecx /* Create PDE entry */
184 movl %ecx,(%edx) /* Store identity PDE entry */ 236 movl %ecx,(%edx) /* Store identity PDE entry */
185 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ 237 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
186 addl $4,%edx 238 addl $4,%edx
@@ -189,19 +241,20 @@ default_entry:
189 stosl 241 stosl
190 addl $0x1000,%eax 242 addl $0x1000,%eax
191 loop 11b 243 loop 11b
192 /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ 244 /*
193 /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ 245 * End condition: we must map up to and including INIT_MAP_BEYOND_END
194 leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp 246 * bytes beyond the end of our own page tables; the +0x007 is
247 * the attribute bits
248 */
249 leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
195 cmpl %ebp,%eax 250 cmpl %ebp,%eax
196 jb 10b 251 jb 10b
197 movl %edi,(init_pg_tables_end - __PAGE_OFFSET) 252 movl %edi,pa(init_pg_tables_end)
198
199 /* Do an early initialization of the fixmap area */
200 movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
201 movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
202 addl $0x67, %eax /* 0x67 == _PAGE_TABLE */
203 movl %eax, 4092(%edx)
204 253
254 /* Do early initialization of the fixmap area */
255 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
256 movl %eax,pa(swapper_pg_dir+0xffc)
257#endif
205 jmp 3f 258 jmp 3f
206/* 259/*
207 * Non-boot CPU entry point; entered from trampoline.S 260 * Non-boot CPU entry point; entered from trampoline.S
@@ -241,7 +294,7 @@ ENTRY(startup_32_smp)
241 * NOTE! We have to correct for the fact that we're 294 * NOTE! We have to correct for the fact that we're
242 * not yet offset PAGE_OFFSET.. 295 * not yet offset PAGE_OFFSET..
243 */ 296 */
244#define cr4_bits mmu_cr4_features-__PAGE_OFFSET 297#define cr4_bits pa(mmu_cr4_features)
245 movl cr4_bits,%edx 298 movl cr4_bits,%edx
246 andl %edx,%edx 299 andl %edx,%edx
247 jz 6f 300 jz 6f
@@ -276,10 +329,10 @@ ENTRY(startup_32_smp)
276/* 329/*
277 * Enable paging 330 * Enable paging
278 */ 331 */
279 movl $swapper_pg_dir-__PAGE_OFFSET,%eax 332 movl $pa(swapper_pg_dir),%eax
280 movl %eax,%cr3 /* set the page table pointer.. */ 333 movl %eax,%cr3 /* set the page table pointer.. */
281 movl %cr0,%eax 334 movl %cr0,%eax
282 orl $0x80000000,%eax 335 orl $X86_CR0_PG,%eax
283 movl %eax,%cr0 /* ..and set paging (PG) bit */ 336 movl %eax,%cr0 /* ..and set paging (PG) bit */
284 ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ 337 ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
2851: 3381:
@@ -552,16 +605,44 @@ ENTRY(_stext)
552 */ 605 */
553.section ".bss.page_aligned","wa" 606.section ".bss.page_aligned","wa"
554 .align PAGE_SIZE_asm 607 .align PAGE_SIZE_asm
608#ifdef CONFIG_X86_PAE
609ENTRY(swapper_pg_pmd)
610 .fill 1024*KPMDS,4,0
611#else
555ENTRY(swapper_pg_dir) 612ENTRY(swapper_pg_dir)
556 .fill 1024,4,0 613 .fill 1024,4,0
557ENTRY(swapper_pg_pmd) 614#endif
615ENTRY(swapper_pg_fixmap)
558 .fill 1024,4,0 616 .fill 1024,4,0
559ENTRY(empty_zero_page) 617ENTRY(empty_zero_page)
560 .fill 4096,1,0 618 .fill 4096,1,0
561
562/* 619/*
563 * This starts the data section. 620 * This starts the data section.
564 */ 621 */
622#ifdef CONFIG_X86_PAE
623.section ".data.page_aligned","wa"
624 /* Page-aligned for the benefit of paravirt? */
625 .align PAGE_SIZE_asm
626ENTRY(swapper_pg_dir)
627 .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */
628# if KPMDS == 3
629 .long pa(swapper_pg_pmd+PGD_ATTR),0
630 .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
631 .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0
632# elif KPMDS == 2
633 .long 0,0
634 .long pa(swapper_pg_pmd+PGD_ATTR),0
635 .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
636# elif KPMDS == 1
637 .long 0,0
638 .long 0,0
639 .long pa(swapper_pg_pmd+PGD_ATTR),0
640# else
641# error "Kernel PMDs should be 1, 2 or 3"
642# endif
643 .align PAGE_SIZE_asm /* needs to be page-sized too */
644#endif
645
565.data 646.data
566ENTRY(stack_start) 647ENTRY(stack_start)
567 .long init_thread_union+THREAD_SIZE 648 .long init_thread_union+THREAD_SIZE
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 219f86eb6123..027fc067b399 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -12,48 +12,37 @@
12 */ 12 */
13 13
14/* 14/*
15 * We are using the 32Khz input clock - its the only one that has the 15 * We are using the 32.768kHz input clock - it's the only one that has the
16 * ranges we find desirable. The following table lists the suitable 16 * ranges we find desirable. The following table lists the suitable
17 * divisors and the associated hz, minimum interval 17 * divisors and the associated Hz, minimum interval and the maximum interval:
18 * and the maximum interval:
19 * 18 *
20 * Divisor Hz Min Delta (S) Max Delta (S) 19 * Divisor Hz Min Delta (s) Max Delta (s)
21 * 1 32000 .0005 2.048 20 * 1 32768 .00048828125 2.000
22 * 2 16000 .001 4.096 21 * 2 16384 .0009765625 4.000
23 * 4 8000 .002 8.192 22 * 4 8192 .001953125 8.000
24 * 8 4000 .004 16.384 23 * 8 4096 .00390625 16.000
25 * 16 2000 .008 32.768 24 * 16 2048 .0078125 32.000
26 * 32 1000 .016 65.536 25 * 32 1024 .015625 64.000
27 * 64 500 .032 131.072 26 * 64 512 .03125 128.000
28 * 128 250 .064 262.144 27 * 128 256 .0625 256.000
29 * 256 125 .128 524.288 28 * 256 128 .125 512.000
30 */ 29 */
31 30
32#include <linux/kernel.h> 31#include <linux/kernel.h>
33#include <linux/interrupt.h> 32#include <linux/interrupt.h>
34#include <linux/module.h>
35#include <asm/geode.h> 33#include <asm/geode.h>
36 34
37#define F_AVAIL 0x01
38
39static struct mfgpt_timer_t { 35static struct mfgpt_timer_t {
40 int flags; 36 unsigned int avail:1;
41 struct module *owner;
42} mfgpt_timers[MFGPT_MAX_TIMERS]; 37} mfgpt_timers[MFGPT_MAX_TIMERS];
43 38
44/* Selected from the table above */ 39/* Selected from the table above */
45 40
46#define MFGPT_DIVISOR 16 41#define MFGPT_DIVISOR 16
47#define MFGPT_SCALE 4 /* divisor = 2^(scale) */ 42#define MFGPT_SCALE 4 /* divisor = 2^(scale) */
48#define MFGPT_HZ (32000 / MFGPT_DIVISOR) 43#define MFGPT_HZ (32768 / MFGPT_DIVISOR)
49#define MFGPT_PERIODIC (MFGPT_HZ / HZ) 44#define MFGPT_PERIODIC (MFGPT_HZ / HZ)
50 45
51#ifdef CONFIG_GEODE_MFGPT_TIMER
52static int __init mfgpt_timer_setup(void);
53#else
54#define mfgpt_timer_setup() (0)
55#endif
56
57/* Allow for disabling of MFGPTs */ 46/* Allow for disabling of MFGPTs */
58static int disable; 47static int disable;
59static int __init mfgpt_disable(char *s) 48static int __init mfgpt_disable(char *s)
@@ -85,28 +74,37 @@ __setup("mfgptfix", mfgpt_fix);
85 * In other cases (such as with VSAless OpenFirmware), the system firmware 74 * In other cases (such as with VSAless OpenFirmware), the system firmware
86 * leaves timers available for us to use. 75 * leaves timers available for us to use.
87 */ 76 */
88int __init geode_mfgpt_detect(void) 77
78
79static int timers = -1;
80
81static void geode_mfgpt_detect(void)
89{ 82{
90 int count = 0, i; 83 int i;
91 u16 val; 84 u16 val;
92 85
86 timers = 0;
87
93 if (disable) { 88 if (disable) {
94 printk(KERN_INFO "geode-mfgpt: Skipping MFGPT setup\n"); 89 printk(KERN_INFO "geode-mfgpt: MFGPT support is disabled\n");
95 return 0; 90 goto done;
91 }
92
93 if (!geode_get_dev_base(GEODE_DEV_MFGPT)) {
94 printk(KERN_INFO "geode-mfgpt: MFGPT LBAR is not set up\n");
95 goto done;
96 } 96 }
97 97
98 for (i = 0; i < MFGPT_MAX_TIMERS; i++) { 98 for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
99 val = geode_mfgpt_read(i, MFGPT_REG_SETUP); 99 val = geode_mfgpt_read(i, MFGPT_REG_SETUP);
100 if (!(val & MFGPT_SETUP_SETUP)) { 100 if (!(val & MFGPT_SETUP_SETUP)) {
101 mfgpt_timers[i].flags = F_AVAIL; 101 mfgpt_timers[i].avail = 1;
102 count++; 102 timers++;
103 } 103 }
104 } 104 }
105 105
106 /* set up clock event device, if desired */ 106done:
107 i = mfgpt_timer_setup(); 107 printk(KERN_INFO "geode-mfgpt: %d MFGPT timers available.\n", timers);
108
109 return count;
110} 108}
111 109
112int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) 110int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
@@ -183,36 +181,41 @@ int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable)
183 return 0; 181 return 0;
184} 182}
185 183
186static int mfgpt_get(int timer, struct module *owner) 184static int mfgpt_get(int timer)
187{ 185{
188 mfgpt_timers[timer].flags &= ~F_AVAIL; 186 mfgpt_timers[timer].avail = 0;
189 mfgpt_timers[timer].owner = owner;
190 printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer); 187 printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer);
191 return timer; 188 return timer;
192} 189}
193 190
194int geode_mfgpt_alloc_timer(int timer, int domain, struct module *owner) 191int geode_mfgpt_alloc_timer(int timer, int domain)
195{ 192{
196 int i; 193 int i;
197 194
198 if (!geode_get_dev_base(GEODE_DEV_MFGPT)) 195 if (timers == -1) {
199 return -ENODEV; 196 /* timers haven't been detected yet */
197 geode_mfgpt_detect();
198 }
199
200 if (!timers)
201 return -1;
202
200 if (timer >= MFGPT_MAX_TIMERS) 203 if (timer >= MFGPT_MAX_TIMERS)
201 return -EIO; 204 return -1;
202 205
203 if (timer < 0) { 206 if (timer < 0) {
204 /* Try to find an available timer */ 207 /* Try to find an available timer */
205 for (i = 0; i < MFGPT_MAX_TIMERS; i++) { 208 for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
206 if (mfgpt_timers[i].flags & F_AVAIL) 209 if (mfgpt_timers[i].avail)
207 return mfgpt_get(i, owner); 210 return mfgpt_get(i);
208 211
209 if (i == 5 && domain == MFGPT_DOMAIN_WORKING) 212 if (i == 5 && domain == MFGPT_DOMAIN_WORKING)
210 break; 213 break;
211 } 214 }
212 } else { 215 } else {
213 /* If they requested a specific timer, try to honor that */ 216 /* If they requested a specific timer, try to honor that */
214 if (mfgpt_timers[timer].flags & F_AVAIL) 217 if (mfgpt_timers[timer].avail)
215 return mfgpt_get(timer, owner); 218 return mfgpt_get(timer);
216 } 219 }
217 220
218 /* No timers available - too bad */ 221 /* No timers available - too bad */
@@ -244,10 +247,11 @@ static int __init mfgpt_setup(char *str)
244} 247}
245__setup("mfgpt_irq=", mfgpt_setup); 248__setup("mfgpt_irq=", mfgpt_setup);
246 249
247static inline void mfgpt_disable_timer(u16 clock) 250static void mfgpt_disable_timer(u16 clock)
248{ 251{
249 u16 val = geode_mfgpt_read(clock, MFGPT_REG_SETUP); 252 /* avoid races by clearing CMP1 and CMP2 unconditionally */
250 geode_mfgpt_write(clock, MFGPT_REG_SETUP, val & ~MFGPT_SETUP_CNTEN); 253 geode_mfgpt_write(clock, MFGPT_REG_SETUP, (u16) ~MFGPT_SETUP_CNTEN |
254 MFGPT_SETUP_CMP1 | MFGPT_SETUP_CMP2);
251} 255}
252 256
253static int mfgpt_next_event(unsigned long, struct clock_event_device *); 257static int mfgpt_next_event(unsigned long, struct clock_event_device *);
@@ -263,7 +267,7 @@ static struct clock_event_device mfgpt_clockevent = {
263 .shift = 32 267 .shift = 32
264}; 268};
265 269
266static inline void mfgpt_start_timer(u16 clock, u16 delta) 270static void mfgpt_start_timer(u16 delta)
267{ 271{
268 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta); 272 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta);
269 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0); 273 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0);
@@ -278,21 +282,25 @@ static void mfgpt_set_mode(enum clock_event_mode mode,
278 mfgpt_disable_timer(mfgpt_event_clock); 282 mfgpt_disable_timer(mfgpt_event_clock);
279 283
280 if (mode == CLOCK_EVT_MODE_PERIODIC) 284 if (mode == CLOCK_EVT_MODE_PERIODIC)
281 mfgpt_start_timer(mfgpt_event_clock, MFGPT_PERIODIC); 285 mfgpt_start_timer(MFGPT_PERIODIC);
282 286
283 mfgpt_tick_mode = mode; 287 mfgpt_tick_mode = mode;
284} 288}
285 289
286static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt) 290static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt)
287{ 291{
288 mfgpt_start_timer(mfgpt_event_clock, delta); 292 mfgpt_start_timer(delta);
289 return 0; 293 return 0;
290} 294}
291 295
292/* Assume (foolishly?), that this interrupt was due to our tick */
293
294static irqreturn_t mfgpt_tick(int irq, void *dev_id) 296static irqreturn_t mfgpt_tick(int irq, void *dev_id)
295{ 297{
298 u16 val = geode_mfgpt_read(mfgpt_event_clock, MFGPT_REG_SETUP);
299
300 /* See if the interrupt was for us */
301 if (!(val & (MFGPT_SETUP_SETUP | MFGPT_SETUP_CMP2 | MFGPT_SETUP_CMP1)))
302 return IRQ_NONE;
303
296 /* Turn off the clock (and clear the event) */ 304 /* Turn off the clock (and clear the event) */
297 mfgpt_disable_timer(mfgpt_event_clock); 305 mfgpt_disable_timer(mfgpt_event_clock);
298 306
@@ -320,13 +328,12 @@ static struct irqaction mfgptirq = {
320 .name = "mfgpt-timer" 328 .name = "mfgpt-timer"
321}; 329};
322 330
323static int __init mfgpt_timer_setup(void) 331int __init mfgpt_timer_setup(void)
324{ 332{
325 int timer, ret; 333 int timer, ret;
326 u16 val; 334 u16 val;
327 335
328 timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING, 336 timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING);
329 THIS_MODULE);
330 if (timer < 0) { 337 if (timer < 0) {
331 printk(KERN_ERR 338 printk(KERN_ERR
332 "mfgpt-timer: Could not allocate a MFPGT timer\n"); 339 "mfgpt-timer: Could not allocate a MFPGT timer\n");
@@ -363,7 +370,7 @@ static int __init mfgpt_timer_setup(void)
363 &mfgpt_clockevent); 370 &mfgpt_clockevent);
364 371
365 printk(KERN_INFO 372 printk(KERN_INFO
366 "mfgpt-timer: registering the MFGT timer as a clock event.\n"); 373 "mfgpt-timer: registering the MFGPT timer as a clock event.\n");
367 clockevents_register_device(&mfgpt_clockevent); 374 clockevents_register_device(&mfgpt_clockevent);
368 375
369 return 0; 376 return 0;
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index d1d8c347cc0b..691ab4cb167b 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -154,7 +154,11 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
154struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; 154struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
155EXPORT_SYMBOL(boot_cpu_data); 155EXPORT_SYMBOL(boot_cpu_data);
156 156
157#ifndef CONFIG_X86_PAE
157unsigned long mmu_cr4_features; 158unsigned long mmu_cr4_features;
159#else
160unsigned long mmu_cr4_features = X86_CR4_PAE;
161#endif
158 162
159/* for MCA, but anyone else can use it if they want */ 163/* for MCA, but anyone else can use it if they want */
160unsigned int machine_id; 164unsigned int machine_id;
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index e6757aaa202b..a40051b71d9b 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -53,7 +53,7 @@ EXPORT_SYMBOL(arch_register_cpu);
53 53
54void arch_unregister_cpu(int num) 54void arch_unregister_cpu(int num)
55{ 55{
56 return unregister_cpu(&per_cpu(cpu_devices, num).cpu); 56 unregister_cpu(&per_cpu(cpu_devices, num).cpu);
57} 57}
58EXPORT_SYMBOL(arch_unregister_cpu); 58EXPORT_SYMBOL(arch_unregister_cpu);
59#else 59#else
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d1bc04006d16..8106bba41ecb 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -46,6 +46,7 @@
46#include <asm/pgalloc.h> 46#include <asm/pgalloc.h>
47#include <asm/sections.h> 47#include <asm/sections.h>
48#include <asm/paravirt.h> 48#include <asm/paravirt.h>
49#include <asm/setup.h>
49 50
50unsigned int __VMALLOC_RESERVE = 128 << 20; 51unsigned int __VMALLOC_RESERVE = 128 << 20;
51 52
@@ -328,44 +329,38 @@ pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
328 329
329void __init native_pagetable_setup_start(pgd_t *base) 330void __init native_pagetable_setup_start(pgd_t *base)
330{ 331{
331#ifdef CONFIG_X86_PAE 332 unsigned long pfn, va;
332 int i; 333 pgd_t *pgd;
334 pud_t *pud;
335 pmd_t *pmd;
336 pte_t *pte;
333 337
334 /* 338 /*
335 * Init entries of the first-level page table to the 339 * Remove any mappings which extend past the end of physical
336 * zero page, if they haven't already been set up. 340 * memory from the boot time page table:
337 *
338 * In a normal native boot, we'll be running on a
339 * pagetable rooted in swapper_pg_dir, but not in PAE
340 * mode, so this will end up clobbering the mappings
341 * for the lower 24Mbytes of the address space,
342 * without affecting the kernel address space.
343 */ 341 */
344 for (i = 0; i < USER_PTRS_PER_PGD; i++) 342 for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
345 set_pgd(&base[i], 343 va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
346 __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); 344 pgd = base + pgd_index(va);
347 345 if (!pgd_present(*pgd))
348 /* Make sure kernel address space is empty so that a pagetable 346 break;
349 will be allocated for it. */ 347
350 memset(&base[USER_PTRS_PER_PGD], 0, 348 pud = pud_offset(pgd, va);
351 KERNEL_PGD_PTRS * sizeof(pgd_t)); 349 pmd = pmd_offset(pud, va);
352#else 350 if (!pmd_present(*pmd))
351 break;
352
353 pte = pte_offset_kernel(pmd, va);
354 if (!pte_present(*pte))
355 break;
356
357 pte_clear(NULL, va, pte);
358 }
353 paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT); 359 paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
354#endif
355} 360}
356 361
357void __init native_pagetable_setup_done(pgd_t *base) 362void __init native_pagetable_setup_done(pgd_t *base)
358{ 363{
359#ifdef CONFIG_X86_PAE
360 /*
361 * Add low memory identity-mappings - SMP needs it when
362 * starting up on an AP from real-mode. In the non-PAE
363 * case we already have these mappings through head.S.
364 * All user-space mappings are explicitly cleared after
365 * SMP startup.
366 */
367 set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
368#endif
369} 364}
370 365
371/* 366/*
@@ -374,9 +369,8 @@ void __init native_pagetable_setup_done(pgd_t *base)
374 * the boot process. 369 * the boot process.
375 * 370 *
376 * If we're booting on native hardware, this will be a pagetable 371 * If we're booting on native hardware, this will be a pagetable
377 * constructed in arch/i386/kernel/head.S, and not running in PAE mode 372 * constructed in arch/x86/kernel/head_32.S. The root of the
378 * (even if we'll end up running in PAE). The root of the pagetable 373 * pagetable will be swapper_pg_dir.
379 * will be swapper_pg_dir.
380 * 374 *
381 * If we're booting paravirtualized under a hypervisor, then there are 375 * If we're booting paravirtualized under a hypervisor, then there are
382 * more options: we may already be running PAE, and the pagetable may 376 * more options: we may already be running PAE, and the pagetable may
@@ -537,14 +531,6 @@ void __init paging_init(void)
537 531
538 load_cr3(swapper_pg_dir); 532 load_cr3(swapper_pg_dir);
539 533
540#ifdef CONFIG_X86_PAE
541 /*
542 * We will bail out later - printk doesn't work right now so
543 * the user would just see a hanging kernel.
544 */
545 if (cpu_has_pae)
546 set_in_cr4(X86_CR4_PAE);
547#endif
548 __flush_tlb_all(); 534 __flush_tlb_all();
549 535
550 kmap_init(); 536 kmap_init();
@@ -675,13 +661,11 @@ void __init mem_init(void)
675 BUG_ON((unsigned long)high_memory > VMALLOC_START); 661 BUG_ON((unsigned long)high_memory > VMALLOC_START);
676#endif /* double-sanity-check paranoia */ 662#endif /* double-sanity-check paranoia */
677 663
678#ifdef CONFIG_X86_PAE
679 if (!cpu_has_pae)
680 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
681#endif
682 if (boot_cpu_data.wp_works_ok < 0) 664 if (boot_cpu_data.wp_works_ok < 0)
683 test_wp_bit(); 665 test_wp_bit();
684 666
667 cpa_init();
668
685 /* 669 /*
686 * Subtle. SMP is doing it's boot stuff late (because it has to 670 * Subtle. SMP is doing it's boot stuff late (because it has to
687 * fork idle threads) - but it also needs low mappings for the 671 * fork idle threads) - but it also needs low mappings for the
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5fe880fc305d..b59fc238151f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -528,13 +528,15 @@ void __init mem_init(void)
528 reservedpages << (PAGE_SHIFT-10), 528 reservedpages << (PAGE_SHIFT-10),
529 datasize >> 10, 529 datasize >> 10,
530 initsize >> 10); 530 initsize >> 10);
531
532 cpa_init();
531} 533}
532 534
533void free_init_pages(char *what, unsigned long begin, unsigned long end) 535void free_init_pages(char *what, unsigned long begin, unsigned long end)
534{ 536{
535 unsigned long addr; 537 unsigned long addr = begin;
536 538
537 if (begin >= end) 539 if (addr >= end)
538 return; 540 return;
539 541
540 /* 542 /*
@@ -549,7 +551,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
549#else 551#else
550 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); 552 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
551 553
552 for (addr = begin; addr < end; addr += PAGE_SIZE) { 554 for (; addr < end; addr += PAGE_SIZE) {
553 ClearPageReserved(virt_to_page(addr)); 555 ClearPageReserved(virt_to_page(addr));
554 init_page_count(virt_to_page(addr)); 556 init_page_count(virt_to_page(addr));
555 memset((void *)(addr & ~(PAGE_SIZE-1)), 557 memset((void *)(addr & ~(PAGE_SIZE-1)),
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ee6648fe6b15..a4897a85268a 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -260,41 +260,46 @@ static int __init early_ioremap_debug_setup(char *str)
260early_param("early_ioremap_debug", early_ioremap_debug_setup); 260early_param("early_ioremap_debug", early_ioremap_debug_setup);
261 261
262static __initdata int after_paging_init; 262static __initdata int after_paging_init;
263static __initdata unsigned long bm_pte[1024] 263static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
264 __attribute__((aligned(PAGE_SIZE))); 264 __attribute__((aligned(PAGE_SIZE)));
265 265
266static inline unsigned long * __init early_ioremap_pgd(unsigned long addr) 266static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
267{ 267{
268 return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023); 268 pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)];
269 pud_t *pud = pud_offset(pgd, addr);
270 pmd_t *pmd = pmd_offset(pud, addr);
271
272 return pmd;
269} 273}
270 274
271static inline unsigned long * __init early_ioremap_pte(unsigned long addr) 275static inline pte_t * __init early_ioremap_pte(unsigned long addr)
272{ 276{
273 return bm_pte + ((addr >> PAGE_SHIFT) & 1023); 277 return &bm_pte[pte_index(addr)];
274} 278}
275 279
276void __init early_ioremap_init(void) 280void __init early_ioremap_init(void)
277{ 281{
278 unsigned long *pgd; 282 pmd_t *pmd;
279 283
280 if (early_ioremap_debug) 284 if (early_ioremap_debug)
281 printk(KERN_INFO "early_ioremap_init()\n"); 285 printk(KERN_INFO "early_ioremap_init()\n");
282 286
283 pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN)); 287 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
284 *pgd = __pa(bm_pte) | _PAGE_TABLE;
285 memset(bm_pte, 0, sizeof(bm_pte)); 288 memset(bm_pte, 0, sizeof(bm_pte));
289 pmd_populate_kernel(&init_mm, pmd, bm_pte);
290
286 /* 291 /*
287 * The boot-ioremap range spans multiple pgds, for which 292 * The boot-ioremap range spans multiple pmds, for which
288 * we are not prepared: 293 * we are not prepared:
289 */ 294 */
290 if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) { 295 if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
291 WARN_ON(1); 296 WARN_ON(1);
292 printk(KERN_WARNING "pgd %p != %p\n", 297 printk(KERN_WARNING "pmd %p != %p\n",
293 pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))); 298 pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
294 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", 299 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
295 fix_to_virt(FIX_BTMAP_BEGIN)); 300 fix_to_virt(FIX_BTMAP_BEGIN));
296 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n", 301 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n",
297 fix_to_virt(FIX_BTMAP_END)); 302 fix_to_virt(FIX_BTMAP_END));
298 303
299 printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END); 304 printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
300 printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n", 305 printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n",
@@ -304,28 +309,29 @@ void __init early_ioremap_init(void)
304 309
305void __init early_ioremap_clear(void) 310void __init early_ioremap_clear(void)
306{ 311{
307 unsigned long *pgd; 312 pmd_t *pmd;
308 313
309 if (early_ioremap_debug) 314 if (early_ioremap_debug)
310 printk(KERN_INFO "early_ioremap_clear()\n"); 315 printk(KERN_INFO "early_ioremap_clear()\n");
311 316
312 pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN)); 317 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
313 *pgd = 0; 318 pmd_clear(pmd);
314 paravirt_release_pt(__pa(pgd) >> PAGE_SHIFT); 319 paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT);
315 __flush_tlb_all(); 320 __flush_tlb_all();
316} 321}
317 322
318void __init early_ioremap_reset(void) 323void __init early_ioremap_reset(void)
319{ 324{
320 enum fixed_addresses idx; 325 enum fixed_addresses idx;
321 unsigned long *pte, phys, addr; 326 unsigned long addr, phys;
327 pte_t *pte;
322 328
323 after_paging_init = 1; 329 after_paging_init = 1;
324 for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) { 330 for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
325 addr = fix_to_virt(idx); 331 addr = fix_to_virt(idx);
326 pte = early_ioremap_pte(addr); 332 pte = early_ioremap_pte(addr);
327 if (*pte & _PAGE_PRESENT) { 333 if (pte_present(*pte)) {
328 phys = *pte & PAGE_MASK; 334 phys = pte_val(*pte) & PAGE_MASK;
329 set_fixmap(idx, phys); 335 set_fixmap(idx, phys);
330 } 336 }
331 } 337 }
@@ -334,7 +340,8 @@ void __init early_ioremap_reset(void)
334static void __init __early_set_fixmap(enum fixed_addresses idx, 340static void __init __early_set_fixmap(enum fixed_addresses idx,
335 unsigned long phys, pgprot_t flags) 341 unsigned long phys, pgprot_t flags)
336{ 342{
337 unsigned long *pte, addr = __fix_to_virt(idx); 343 unsigned long addr = __fix_to_virt(idx);
344 pte_t *pte;
338 345
339 if (idx >= __end_of_fixed_addresses) { 346 if (idx >= __end_of_fixed_addresses) {
340 BUG(); 347 BUG();
@@ -342,9 +349,9 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
342 } 349 }
343 pte = early_ioremap_pte(addr); 350 pte = early_ioremap_pte(addr);
344 if (pgprot_val(flags)) 351 if (pgprot_val(flags))
345 *pte = (phys & PAGE_MASK) | pgprot_val(flags); 352 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
346 else 353 else
347 *pte = 0; 354 pte_clear(NULL, addr, pte);
348 __flush_tlb_one(addr); 355 __flush_tlb_one(addr);
349} 356}
350 357
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 8493c855582b..440210a2277d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -8,6 +8,7 @@
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/interrupt.h>
11 12
12#include <asm/e820.h> 13#include <asm/e820.h>
13#include <asm/processor.h> 14#include <asm/processor.h>
@@ -191,7 +192,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
191 * or when the present bit is not set. Otherwise we would return a 192 * or when the present bit is not set. Otherwise we would return a
192 * pointer to a nonexisting mapping. 193 * pointer to a nonexisting mapping.
193 */ 194 */
194pte_t *lookup_address(unsigned long address, int *level) 195pte_t *lookup_address(unsigned long address, unsigned int *level)
195{ 196{
196 pgd_t *pgd = pgd_offset_k(address); 197 pgd_t *pgd = pgd_offset_k(address);
197 pud_t *pud; 198 pud_t *pud;
@@ -252,10 +253,11 @@ static int
252try_preserve_large_page(pte_t *kpte, unsigned long address, 253try_preserve_large_page(pte_t *kpte, unsigned long address,
253 struct cpa_data *cpa) 254 struct cpa_data *cpa)
254{ 255{
255 unsigned long nextpage_addr, numpages, pmask, psize, flags; 256 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr;
256 pte_t new_pte, old_pte, *tmp; 257 pte_t new_pte, old_pte, *tmp;
257 pgprot_t old_prot, new_prot; 258 pgprot_t old_prot, new_prot;
258 int level, do_split = 1; 259 int i, do_split = 1;
260 unsigned int level;
259 261
260 spin_lock_irqsave(&pgd_lock, flags); 262 spin_lock_irqsave(&pgd_lock, flags);
261 /* 263 /*
@@ -302,6 +304,19 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
302 new_prot = static_protections(new_prot, address); 304 new_prot = static_protections(new_prot, address);
303 305
304 /* 306 /*
307 * We need to check the full range, whether
308 * static_protection() requires a different pgprot for one of
309 * the pages in the range we try to preserve:
310 */
311 addr = address + PAGE_SIZE;
312 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE) {
313 pgprot_t chk_prot = static_protections(new_prot, addr);
314
315 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
316 goto out_unlock;
317 }
318
319 /*
305 * If there are no changes, return. maxpages has been updated 320 * If there are no changes, return. maxpages has been updated
306 * above: 321 * above:
307 */ 322 */
@@ -335,23 +350,103 @@ out_unlock:
335 return do_split; 350 return do_split;
336} 351}
337 352
353static LIST_HEAD(page_pool);
354static unsigned long pool_size, pool_pages, pool_low;
355static unsigned long pool_used, pool_failed, pool_refill;
356
357static void cpa_fill_pool(void)
358{
359 struct page *p;
360 gfp_t gfp = GFP_KERNEL;
361
362 /* Do not allocate from interrupt context */
363 if (in_irq() || irqs_disabled())
364 return;
365 /*
366 * Check unlocked. I does not matter when we have one more
367 * page in the pool. The bit lock avoids recursive pool
368 * allocations:
369 */
370 if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
371 return;
372
373#ifdef CONFIG_DEBUG_PAGEALLOC
374 /*
375 * We could do:
376 * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
377 * but this fails on !PREEMPT kernels
378 */
379 gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
380#endif
381
382 while (pool_pages < pool_size) {
383 p = alloc_pages(gfp, 0);
384 if (!p) {
385 pool_failed++;
386 break;
387 }
388 spin_lock_irq(&pgd_lock);
389 list_add(&p->lru, &page_pool);
390 pool_pages++;
391 spin_unlock_irq(&pgd_lock);
392 }
393 clear_bit_unlock(0, &pool_refill);
394}
395
396#define SHIFT_MB (20 - PAGE_SHIFT)
397#define ROUND_MB_GB ((1 << 10) - 1)
398#define SHIFT_MB_GB 10
399#define POOL_PAGES_PER_GB 16
400
401void __init cpa_init(void)
402{
403 struct sysinfo si;
404 unsigned long gb;
405
406 si_meminfo(&si);
407 /*
408 * Calculate the number of pool pages:
409 *
410 * Convert totalram (nr of pages) to MiB and round to the next
411 * GiB. Shift MiB to Gib and multiply the result by
412 * POOL_PAGES_PER_GB:
413 */
414 gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
415 pool_size = POOL_PAGES_PER_GB * gb;
416 pool_low = pool_size;
417
418 cpa_fill_pool();
419 printk(KERN_DEBUG
420 "CPA: page pool initialized %lu of %lu pages preallocated\n",
421 pool_pages, pool_size);
422}
423
338static int split_large_page(pte_t *kpte, unsigned long address) 424static int split_large_page(pte_t *kpte, unsigned long address)
339{ 425{
340 unsigned long flags, pfn, pfninc = 1; 426 unsigned long flags, pfn, pfninc = 1;
341 gfp_t gfp_flags = GFP_KERNEL;
342 unsigned int i, level; 427 unsigned int i, level;
343 pte_t *pbase, *tmp; 428 pte_t *pbase, *tmp;
344 pgprot_t ref_prot; 429 pgprot_t ref_prot;
345 struct page *base; 430 struct page *base;
346 431
347#ifdef CONFIG_DEBUG_PAGEALLOC 432 /*
348 gfp_flags = GFP_ATOMIC | __GFP_NOWARN; 433 * Get a page from the pool. The pool list is protected by the
349#endif 434 * pgd_lock, which we have to take anyway for the split
350 base = alloc_pages(gfp_flags, 0); 435 * operation:
351 if (!base) 436 */
437 spin_lock_irqsave(&pgd_lock, flags);
438 if (list_empty(&page_pool)) {
439 spin_unlock_irqrestore(&pgd_lock, flags);
352 return -ENOMEM; 440 return -ENOMEM;
441 }
442
443 base = list_first_entry(&page_pool, struct page, lru);
444 list_del(&base->lru);
445 pool_pages--;
446
447 if (pool_pages < pool_low)
448 pool_low = pool_pages;
353 449
354 spin_lock_irqsave(&pgd_lock, flags);
355 /* 450 /*
356 * Check for races, another CPU might have split this page 451 * Check for races, another CPU might have split this page
357 * up for us already: 452 * up for us already:
@@ -396,17 +491,24 @@ static int split_large_page(pte_t *kpte, unsigned long address)
396 base = NULL; 491 base = NULL;
397 492
398out_unlock: 493out_unlock:
494 /*
495 * If we dropped out via the lookup_address check under
496 * pgd_lock then stick the page back into the pool:
497 */
498 if (base) {
499 list_add(&base->lru, &page_pool);
500 pool_pages++;
501 } else
502 pool_used++;
399 spin_unlock_irqrestore(&pgd_lock, flags); 503 spin_unlock_irqrestore(&pgd_lock, flags);
400 504
401 if (base)
402 __free_pages(base, 0);
403
404 return 0; 505 return 0;
405} 506}
406 507
407static int __change_page_attr(unsigned long address, struct cpa_data *cpa) 508static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
408{ 509{
409 int level, do_split, err; 510 int do_split, err;
511 unsigned int level;
410 struct page *kpte_page; 512 struct page *kpte_page;
411 pte_t *kpte; 513 pte_t *kpte;
412 514
@@ -598,7 +700,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
598 * Check whether we really changed something: 700 * Check whether we really changed something:
599 */ 701 */
600 if (!cpa.flushtlb) 702 if (!cpa.flushtlb)
601 return ret; 703 goto out;
602 704
603 /* 705 /*
604 * No need to flush, when we did not set any of the caching 706 * No need to flush, when we did not set any of the caching
@@ -617,6 +719,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
617 else 719 else
618 cpa_flush_all(cache); 720 cpa_flush_all(cache);
619 721
722out:
723 cpa_fill_pool();
620 return ret; 724 return ret;
621} 725}
622 726
@@ -770,6 +874,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
770 * but that can deadlock->flush only current cpu: 874 * but that can deadlock->flush only current cpu:
771 */ 875 */
772 __flush_tlb_all(); 876 __flush_tlb_all();
877
878 /*
879 * Try to refill the page pool here. We can do this only after
880 * the tlb flush.
881 */
882 cpa_fill_pool();
773} 883}
774#endif 884#endif
775 885
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile
index d764ec950065..9ff4d5b55ad1 100644
--- a/arch/x86/power/Makefile
+++ b/arch/x86/power/Makefile
@@ -1,2 +1,2 @@
1obj-$(CONFIG_PM) += cpu.o 1obj-$(CONFIG_PM_SLEEP) += cpu_$(BITS).o
2obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o 2obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu_32.c
index efcf620d1439..7f9c6da04a4c 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu_32.c
@@ -40,7 +40,7 @@ static void __save_processor_state(struct saved_context *ctxt)
40 savesegment(ss, ctxt->ss); 40 savesegment(ss, ctxt->ss);
41 41
42 /* 42 /*
43 * control registers 43 * control registers
44 */ 44 */
45 ctxt->cr0 = read_cr0(); 45 ctxt->cr0 = read_cr0();
46 ctxt->cr2 = read_cr2(); 46 ctxt->cr2 = read_cr2();
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/power/cpu_64.c
index 7ac7130022f1..66bdfb591fd8 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -1,8 +1,9 @@
1/* 1/*
2 * Suspend support specific for i386. 2 * Suspend and hibernation support for x86-64
3 * 3 *
4 * Distribute under GPLv2 4 * Distribute under GPLv2
5 * 5 *
6 * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
6 * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> 7 * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
7 * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> 8 * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
8 */ 9 */
@@ -14,9 +15,6 @@
14#include <asm/pgtable.h> 15#include <asm/pgtable.h>
15#include <asm/mtrr.h> 16#include <asm/mtrr.h>
16 17
17/* References to section boundaries */
18extern const void __nosave_begin, __nosave_end;
19
20static void fix_processor_context(void); 18static void fix_processor_context(void);
21 19
22struct saved_context saved_context; 20struct saved_context saved_context;
@@ -63,7 +61,7 @@ static void __save_processor_state(struct saved_context *ctxt)
63 mtrr_save_fixed_ranges(NULL); 61 mtrr_save_fixed_ranges(NULL);
64 62
65 /* 63 /*
66 * control registers 64 * control registers
67 */ 65 */
68 rdmsrl(MSR_EFER, ctxt->efer); 66 rdmsrl(MSR_EFER, ctxt->efer);
69 ctxt->cr0 = read_cr0(); 67 ctxt->cr0 = read_cr0();
@@ -166,155 +164,3 @@ static void fix_processor_context(void)
166 loaddebug(&current->thread, 7); 164 loaddebug(&current->thread, 7);
167 } 165 }
168} 166}
169
170#ifdef CONFIG_HIBERNATION
171/* Defined in arch/x86_64/kernel/suspend_asm.S */
172extern int restore_image(void);
173
174/*
175 * Address to jump to in the last phase of restore in order to get to the image
176 * kernel's text (this value is passed in the image header).
177 */
178unsigned long restore_jump_address;
179
180/*
181 * Value of the cr3 register from before the hibernation (this value is passed
182 * in the image header).
183 */
184unsigned long restore_cr3;
185
186pgd_t *temp_level4_pgt;
187
188void *relocated_restore_code;
189
190static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
191{
192 long i, j;
193
194 i = pud_index(address);
195 pud = pud + i;
196 for (; i < PTRS_PER_PUD; pud++, i++) {
197 unsigned long paddr;
198 pmd_t *pmd;
199
200 paddr = address + i*PUD_SIZE;
201 if (paddr >= end)
202 break;
203
204 pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
205 if (!pmd)
206 return -ENOMEM;
207 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
208 for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
209 unsigned long pe;
210
211 if (paddr >= end)
212 break;
213 pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
214 pe &= __supported_pte_mask;
215 set_pmd(pmd, __pmd(pe));
216 }
217 }
218 return 0;
219}
220
221static int set_up_temporary_mappings(void)
222{
223 unsigned long start, end, next;
224 int error;
225
226 temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
227 if (!temp_level4_pgt)
228 return -ENOMEM;
229
230 /* It is safe to reuse the original kernel mapping */
231 set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
232 init_level4_pgt[pgd_index(__START_KERNEL_map)]);
233
234 /* Set up the direct mapping from scratch */
235 start = (unsigned long)pfn_to_kaddr(0);
236 end = (unsigned long)pfn_to_kaddr(end_pfn);
237
238 for (; start < end; start = next) {
239 pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
240 if (!pud)
241 return -ENOMEM;
242 next = start + PGDIR_SIZE;
243 if (next > end)
244 next = end;
245 if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
246 return error;
247 set_pgd(temp_level4_pgt + pgd_index(start),
248 mk_kernel_pgd(__pa(pud)));
249 }
250 return 0;
251}
252
253int swsusp_arch_resume(void)
254{
255 int error;
256
257 /* We have got enough memory and from now on we cannot recover */
258 if ((error = set_up_temporary_mappings()))
259 return error;
260
261 relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
262 if (!relocated_restore_code)
263 return -ENOMEM;
264 memcpy(relocated_restore_code, &core_restore_code,
265 &restore_registers - &core_restore_code);
266
267 restore_image();
268 return 0;
269}
270
271/*
272 * pfn_is_nosave - check if given pfn is in the 'nosave' section
273 */
274
275int pfn_is_nosave(unsigned long pfn)
276{
277 unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
278 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
279 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
280}
281
282struct restore_data_record {
283 unsigned long jump_address;
284 unsigned long cr3;
285 unsigned long magic;
286};
287
288#define RESTORE_MAGIC 0x0123456789ABCDEFUL
289
290/**
291 * arch_hibernation_header_save - populate the architecture specific part
292 * of a hibernation image header
293 * @addr: address to save the data at
294 */
295int arch_hibernation_header_save(void *addr, unsigned int max_size)
296{
297 struct restore_data_record *rdr = addr;
298
299 if (max_size < sizeof(struct restore_data_record))
300 return -EOVERFLOW;
301 rdr->jump_address = restore_jump_address;
302 rdr->cr3 = restore_cr3;
303 rdr->magic = RESTORE_MAGIC;
304 return 0;
305}
306
307/**
308 * arch_hibernation_header_restore - read the architecture specific data
309 * from the hibernation image header
310 * @addr: address to read the data from
311 */
312int arch_hibernation_header_restore(void *addr)
313{
314 struct restore_data_record *rdr = addr;
315
316 restore_jump_address = rdr->jump_address;
317 restore_cr3 = rdr->cr3;
318 return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
319}
320#endif /* CONFIG_HIBERNATION */
diff --git a/arch/x86/power/suspend.c b/arch/x86/power/hibernate_32.c
index a0020b913f31..f2b6e3f11bfc 100644
--- a/arch/x86/power/suspend.c
+++ b/arch/x86/power/hibernate_32.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Suspend support specific for i386 - temporary page tables 2 * Hibernation support specific for i386 - temporary page tables
3 * 3 *
4 * Distribute under GPLv2 4 * Distribute under GPLv2
5 * 5 *
@@ -13,7 +13,7 @@
13#include <asm/page.h> 13#include <asm/page.h>
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15 15
16/* Defined in arch/i386/power/swsusp.S */ 16/* Defined in hibernate_asm_32.S */
17extern int restore_image(void); 17extern int restore_image(void);
18 18
19/* References to section boundaries */ 19/* References to section boundaries */
@@ -23,7 +23,7 @@ extern const void __nosave_begin, __nosave_end;
23pgd_t *resume_pg_dir; 23pgd_t *resume_pg_dir;
24 24
25/* The following three functions are based on the analogous code in 25/* The following three functions are based on the analogous code in
26 * arch/i386/mm/init.c 26 * arch/x86/mm/init_32.c
27 */ 27 */
28 28
29/* 29/*
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
new file mode 100644
index 000000000000..b542355e0e34
--- /dev/null
+++ b/arch/x86/power/hibernate_64.c
@@ -0,0 +1,169 @@
1/*
2 * Hibernation support for x86-64
3 *
4 * Distribute under GPLv2
5 *
6 * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
7 * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
8 * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
9 */
10
11#include <linux/smp.h>
12#include <linux/suspend.h>
13#include <asm/proto.h>
14#include <asm/page.h>
15#include <asm/pgtable.h>
16#include <asm/mtrr.h>
17
18/* References to section boundaries */
19extern const void __nosave_begin, __nosave_end;
20
21/* Defined in hibernate_asm_64.S */
22extern int restore_image(void);
23
24/*
25 * Address to jump to in the last phase of restore in order to get to the image
26 * kernel's text (this value is passed in the image header).
27 */
28unsigned long restore_jump_address;
29
30/*
31 * Value of the cr3 register from before the hibernation (this value is passed
32 * in the image header).
33 */
34unsigned long restore_cr3;
35
36pgd_t *temp_level4_pgt;
37
38void *relocated_restore_code;
39
40static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
41{
42 long i, j;
43
44 i = pud_index(address);
45 pud = pud + i;
46 for (; i < PTRS_PER_PUD; pud++, i++) {
47 unsigned long paddr;
48 pmd_t *pmd;
49
50 paddr = address + i*PUD_SIZE;
51 if (paddr >= end)
52 break;
53
54 pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
55 if (!pmd)
56 return -ENOMEM;
57 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
58 for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
59 unsigned long pe;
60
61 if (paddr >= end)
62 break;
63 pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
64 pe &= __supported_pte_mask;
65 set_pmd(pmd, __pmd(pe));
66 }
67 }
68 return 0;
69}
70
71static int set_up_temporary_mappings(void)
72{
73 unsigned long start, end, next;
74 int error;
75
76 temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
77 if (!temp_level4_pgt)
78 return -ENOMEM;
79
80 /* It is safe to reuse the original kernel mapping */
81 set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
82 init_level4_pgt[pgd_index(__START_KERNEL_map)]);
83
84 /* Set up the direct mapping from scratch */
85 start = (unsigned long)pfn_to_kaddr(0);
86 end = (unsigned long)pfn_to_kaddr(end_pfn);
87
88 for (; start < end; start = next) {
89 pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
90 if (!pud)
91 return -ENOMEM;
92 next = start + PGDIR_SIZE;
93 if (next > end)
94 next = end;
95 if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
96 return error;
97 set_pgd(temp_level4_pgt + pgd_index(start),
98 mk_kernel_pgd(__pa(pud)));
99 }
100 return 0;
101}
102
103int swsusp_arch_resume(void)
104{
105 int error;
106
107 /* We have got enough memory and from now on we cannot recover */
108 if ((error = set_up_temporary_mappings()))
109 return error;
110
111 relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
112 if (!relocated_restore_code)
113 return -ENOMEM;
114 memcpy(relocated_restore_code, &core_restore_code,
115 &restore_registers - &core_restore_code);
116
117 restore_image();
118 return 0;
119}
120
121/*
122 * pfn_is_nosave - check if given pfn is in the 'nosave' section
123 */
124
125int pfn_is_nosave(unsigned long pfn)
126{
127 unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
128 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
129 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
130}
131
132struct restore_data_record {
133 unsigned long jump_address;
134 unsigned long cr3;
135 unsigned long magic;
136};
137
138#define RESTORE_MAGIC 0x0123456789ABCDEFUL
139
140/**
141 * arch_hibernation_header_save - populate the architecture specific part
142 * of a hibernation image header
143 * @addr: address to save the data at
144 */
145int arch_hibernation_header_save(void *addr, unsigned int max_size)
146{
147 struct restore_data_record *rdr = addr;
148
149 if (max_size < sizeof(struct restore_data_record))
150 return -EOVERFLOW;
151 rdr->jump_address = restore_jump_address;
152 rdr->cr3 = restore_cr3;
153 rdr->magic = RESTORE_MAGIC;
154 return 0;
155}
156
157/**
158 * arch_hibernation_header_restore - read the architecture specific data
159 * from the hibernation image header
160 * @addr: address to read the data from
161 */
162int arch_hibernation_header_restore(void *addr)
163{
164 struct restore_data_record *rdr = addr;
165
166 restore_jump_address = rdr->jump_address;
167 restore_cr3 = rdr->cr3;
168 return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
169}
diff --git a/arch/x86/power/swsusp.S b/arch/x86/power/hibernate_asm_32.S
index 53662e05b393..b95aa6cfe3cb 100644
--- a/arch/x86/power/swsusp.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -1,7 +1,6 @@
1.text 1.text
2 2
3/* Originally gcc generated, modified by hand 3/*
4 *
5 * This may not use any stack, nor any variable that is not "NoSave": 4 * This may not use any stack, nor any variable that is not "NoSave":
6 * 5 *
7 * Its rewriting one kernel image with another. What is stack in "old" 6 * Its rewriting one kernel image with another. What is stack in "old"
diff --git a/arch/x86/kernel/suspend_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index aeb9a4d7681e..1deb3244b99b 100644
--- a/arch/x86/kernel/suspend_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -1,7 +1,12 @@
1/* Copyright 2004,2005 Pavel Machek <pavel@suse.cz>, Andi Kleen <ak@suse.de>, Rafael J. Wysocki <rjw@sisk.pl> 1/*
2 * Hibernation support for x86-64
2 * 3 *
3 * Distribute under GPLv2. 4 * Distribute under GPLv2.
4 * 5 *
6 * Copyright 2007 Rafael J. Wysocki <rjw@sisk.pl>
7 * Copyright 2005 Andi Kleen <ak@suse.de>
8 * Copyright 2004 Pavel Machek <pavel@suse.cz>
9 *
5 * swsusp_arch_resume must not use any stack or any nonlocal variables while 10 * swsusp_arch_resume must not use any stack or any nonlocal variables while
6 * copying pages: 11 * copying pages:
7 * 12 *
@@ -9,7 +14,7 @@
9 * image could very well be data page in "new" image, and overwriting 14 * image could very well be data page in "new" image, and overwriting
10 * your own stack under you is bad idea. 15 * your own stack under you is bad idea.
11 */ 16 */
12 17
13 .text 18 .text
14#include <linux/linkage.h> 19#include <linux/linkage.h>
15#include <asm/segment.h> 20#include <asm/segment.h>
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 45aa771e73a9..0144395448ae 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -58,7 +58,7 @@
58 58
59xmaddr_t arbitrary_virt_to_machine(unsigned long address) 59xmaddr_t arbitrary_virt_to_machine(unsigned long address)
60{ 60{
61 int level; 61 unsigned int level;
62 pte_t *pte = lookup_address(address, &level); 62 pte_t *pte = lookup_address(address, &level);
63 unsigned offset = address & PAGE_MASK; 63 unsigned offset = address & PAGE_MASK;
64 64
@@ -71,7 +71,7 @@ void make_lowmem_page_readonly(void *vaddr)
71{ 71{
72 pte_t *pte, ptev; 72 pte_t *pte, ptev;
73 unsigned long address = (unsigned long)vaddr; 73 unsigned long address = (unsigned long)vaddr;
74 int level; 74 unsigned int level;
75 75
76 pte = lookup_address(address, &level); 76 pte = lookup_address(address, &level);
77 BUG_ON(pte == NULL); 77 BUG_ON(pte == NULL);
@@ -86,7 +86,7 @@ void make_lowmem_page_readwrite(void *vaddr)
86{ 86{
87 pte_t *pte, ptev; 87 pte_t *pte, ptev;
88 unsigned long address = (unsigned long)vaddr; 88 unsigned long address = (unsigned long)vaddr;
89 int level; 89 unsigned int level;
90 90
91 pte = lookup_address(address, &level); 91 pte = lookup_address(address, &level);
92 BUG_ON(pte == NULL); 92 BUG_ON(pte == NULL);
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index b3721fd6877b..c39e1a5aa241 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -217,17 +217,17 @@ unsigned long long xen_sched_clock(void)
217/* Get the CPU speed from Xen */ 217/* Get the CPU speed from Xen */
218unsigned long xen_cpu_khz(void) 218unsigned long xen_cpu_khz(void)
219{ 219{
220 u64 cpu_khz = 1000000ULL << 32; 220 u64 xen_khz = 1000000ULL << 32;
221 const struct vcpu_time_info *info = 221 const struct vcpu_time_info *info =
222 &HYPERVISOR_shared_info->vcpu_info[0].time; 222 &HYPERVISOR_shared_info->vcpu_info[0].time;
223 223
224 do_div(cpu_khz, info->tsc_to_system_mul); 224 do_div(xen_khz, info->tsc_to_system_mul);
225 if (info->tsc_shift < 0) 225 if (info->tsc_shift < 0)
226 cpu_khz <<= -info->tsc_shift; 226 xen_khz <<= -info->tsc_shift;
227 else 227 else
228 cpu_khz >>= info->tsc_shift; 228 xen_khz >>= info->tsc_shift;
229 229
230 return cpu_khz; 230 return xen_khz;
231} 231}
232 232
233/* 233/*