aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-10 02:29:57 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-10 02:29:57 -0500
commit0b6ca82af83a79f3d1001c8a0701ed34ac38126e (patch)
treedef8eb112c513b21e826e370f2f34249e97914eb
parentbfc1de0c40a26c6daa46c297e28138aecb4c5664 (diff)
parentfac84939609a683503947f41eb93e1917d026263 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86
* git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86: (32 commits) x86: cpa, strict range check in try_preserve_large_page() x86: cpa, enable CONFIG_DEBUG_PAGEALLOC on 64-bit x86: cpa, use page pool x86: introduce page pool in cpa x86: DEBUG_PAGEALLOC: enable after mem_init() brk: help text typo fix lguest: accept guest _PAGE_PWT page table entries x86 PM: update stale comments x86 PM: consolidate suspend and hibernation code x86 PM: rename 32-bit files in arch/x86/power x86 PM: move 64-bit hibernation files to arch/x86/power x86: trivial printk optimizations x86: fix early_ioremap pagetable ops x86: construct 32-bit boot time page tables in native format. x86, core: remove CONFIG_FORCED_INLINING x86: avoid unused variable warning in mm/init_64.c x86: fixup more paravirt fallout brk: document randomize_va_space and CONFIG_COMPAT_BRK (was Re: x86: fix sparse warnings in acpi/bus.c x86: fix sparse warning in topology.c ...
-rw-r--r--Documentation/feature-removal-schedule.txt9
-rw-r--r--Documentation/sysctl/kernel.txt29
-rw-r--r--arch/x86/Kconfig.debug6
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/printf.c24
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c2
-rw-r--r--arch/x86/kernel/entry_32.S15
-rw-r--r--arch/x86/kernel/entry_64.S18
-rw-r--r--arch/x86/kernel/geode_32.c5
-rw-r--r--arch/x86/kernel/head_32.S151
-rw-r--r--arch/x86/kernel/mfgpt_32.c123
-rw-r--r--arch/x86/kernel/setup_32.c4
-rw-r--r--arch/x86/kernel/topology.c2
-rw-r--r--arch/x86/mm/init_32.c74
-rw-r--r--arch/x86/mm/init_64.c8
-rw-r--r--arch/x86/mm/ioremap.c55
-rw-r--r--arch/x86/mm/pageattr.c140
-rw-r--r--arch/x86/power/Makefile4
-rw-r--r--arch/x86/power/cpu_32.c (renamed from arch/x86/power/cpu.c)2
-rw-r--r--arch/x86/power/cpu_64.c (renamed from arch/x86/kernel/suspend_64.c)160
-rw-r--r--arch/x86/power/hibernate_32.c (renamed from arch/x86/power/suspend.c)6
-rw-r--r--arch/x86/power/hibernate_64.c169
-rw-r--r--arch/x86/power/hibernate_asm_32.S (renamed from arch/x86/power/swsusp.S)3
-rw-r--r--arch/x86/power/hibernate_asm_64.S (renamed from arch/x86/kernel/suspend_asm_64.S)9
-rw-r--r--arch/x86/xen/mmu.c6
-rw-r--r--arch/x86/xen/time.c10
-rw-r--r--drivers/acpi/bus.c7
-rw-r--r--drivers/lguest/page_tables.c4
-rw-r--r--include/asm-x86/acpi.h4
-rw-r--r--include/asm-x86/cacheflush.h2
-rw-r--r--include/asm-x86/geode.h9
-rw-r--r--include/asm-x86/page_32.h1
-rw-r--r--include/asm-x86/pgtable.h2
-rw-r--r--include/asm-x86/pgtable_32.h4
-rw-r--r--include/linux/compiler-gcc4.h9
-rw-r--r--init/Kconfig2
-rw-r--r--init/main.c2
-rw-r--r--lib/Kconfig.debug14
-rw-r--r--lib/vsprintf.c49
42 files changed, 684 insertions, 467 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 2039f47f2e65..4d3aa519eadf 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -111,15 +111,6 @@ Who: Christoph Hellwig <hch@lst.de>
111 111
112--------------------------- 112---------------------------
113 113
114What: CONFIG_FORCED_INLINING
115When: June 2006
116Why: Config option is there to see if gcc is good enough. (in january
117 2006). If it is, the behavior should just be the default. If it's not,
118 the option should just go away entirely.
119Who: Arjan van de Ven
120
121---------------------------
122
123What: eepro100 network driver 114What: eepro100 network driver
124When: January 2007 115When: January 2007
125Why: replaced by the e100 driver 116Why: replaced by the e100 driver
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 8984a5396271..dc8801d4e944 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -41,6 +41,7 @@ show up in /proc/sys/kernel:
41- pid_max 41- pid_max
42- powersave-nap [ PPC only ] 42- powersave-nap [ PPC only ]
43- printk 43- printk
44- randomize_va_space
44- real-root-dev ==> Documentation/initrd.txt 45- real-root-dev ==> Documentation/initrd.txt
45- reboot-cmd [ SPARC only ] 46- reboot-cmd [ SPARC only ]
46- rtsig-max 47- rtsig-max
@@ -280,6 +281,34 @@ send before ratelimiting kicks in.
280 281
281============================================================== 282==============================================================
282 283
284randomize-va-space:
285
286This option can be used to select the type of process address
287space randomization that is used in the system, for architectures
288that support this feature.
289
2900 - Turn the process address space randomization off by default.
291
2921 - Make the addresses of mmap base, stack and VDSO page randomized.
293 This, among other things, implies that shared libraries will be
294 loaded to random addresses. Also for PIE-linked binaries, the location
295 of code start is randomized.
296
297 With heap randomization, the situation is a little bit more
298 complicated.
299 There a few legacy applications out there (such as some ancient
300 versions of libc.so.5 from 1996) that assume that brk area starts
301 just after the end of the code+bss. These applications break when
302 start of the brk area is randomized. There are however no known
303 non-legacy applications that would be broken this way, so for most
304 systems it is safe to choose full randomization. However there is
305 a CONFIG_COMPAT_BRK option for systems with ancient and/or broken
306 binaries, that makes heap non-randomized, but keeps all other
307 parts of process address space randomized if randomize_va_space
308 sysctl is turned on.
309
310==============================================================
311
283reboot-cmd: (Sparc only) 312reboot-cmd: (Sparc only)
284 313
285??? This seems to be a way to give an argument to the Sparc 314??? This seems to be a way to give an argument to the Sparc
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index fa555148823d..864affc9a7b0 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -34,13 +34,9 @@ config DEBUG_STACK_USAGE
34 34
35 This option will slow down process creation somewhat. 35 This option will slow down process creation somewhat.
36 36
37comment "Page alloc debug is incompatible with Software Suspend on i386"
38 depends on DEBUG_KERNEL && HIBERNATION
39 depends on X86_32
40
41config DEBUG_PAGEALLOC 37config DEBUG_PAGEALLOC
42 bool "Debug page memory allocations" 38 bool "Debug page memory allocations"
43 depends on DEBUG_KERNEL && X86_32 39 depends on DEBUG_KERNEL
44 help 40 help
45 Unmap pages from the kernel linear mapping after free_pages(). 41 Unmap pages from the kernel linear mapping after free_pages().
46 This results in a large slowdown, but helps to find certain types 42 This results in a large slowdown, but helps to find certain types
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 364865b1b08d..204af43535c5 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -191,8 +191,10 @@ drivers-$(CONFIG_PCI) += arch/x86/pci/
191# must be linked after kernel/ 191# must be linked after kernel/
192drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/ 192drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
193 193
194ifeq ($(CONFIG_X86_32),y) 194# suspend and hibernation support
195drivers-$(CONFIG_PM) += arch/x86/power/ 195drivers-$(CONFIG_PM) += arch/x86/power/
196
197ifeq ($(CONFIG_X86_32),y)
196drivers-$(CONFIG_FB) += arch/x86/video/ 198drivers-$(CONFIG_FB) += arch/x86/video/
197endif 199endif
198 200
diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c
index 1a09f9309d3c..7e7e890699be 100644
--- a/arch/x86/boot/printf.c
+++ b/arch/x86/boot/printf.c
@@ -33,8 +33,8 @@ static int skip_atoi(const char **s)
33#define PLUS 4 /* show plus */ 33#define PLUS 4 /* show plus */
34#define SPACE 8 /* space if plus */ 34#define SPACE 8 /* space if plus */
35#define LEFT 16 /* left justified */ 35#define LEFT 16 /* left justified */
36#define SPECIAL 32 /* 0x */ 36#define SMALL 32 /* Must be 32 == 0x20 */
37#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ 37#define SPECIAL 64 /* 0x */
38 38
39#define do_div(n,base) ({ \ 39#define do_div(n,base) ({ \
40int __res; \ 40int __res; \
@@ -45,12 +45,16 @@ __res; })
45static char *number(char *str, long num, int base, int size, int precision, 45static char *number(char *str, long num, int base, int size, int precision,
46 int type) 46 int type)
47{ 47{
48 char c, sign, tmp[66]; 48 /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
49 const char *digits = "0123456789abcdefghijklmnopqrstuvwxyz"; 49 static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
50
51 char tmp[66];
52 char c, sign, locase;
50 int i; 53 int i;
51 54
52 if (type & LARGE) 55 /* locase = 0 or 0x20. ORing digits or letters with 'locase'
53 digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 56 * produces same digits or (maybe lowercased) letters */
57 locase = (type & SMALL);
54 if (type & LEFT) 58 if (type & LEFT)
55 type &= ~ZEROPAD; 59 type &= ~ZEROPAD;
56 if (base < 2 || base > 36) 60 if (base < 2 || base > 36)
@@ -81,7 +85,7 @@ static char *number(char *str, long num, int base, int size, int precision,
81 tmp[i++] = '0'; 85 tmp[i++] = '0';
82 else 86 else
83 while (num != 0) 87 while (num != 0)
84 tmp[i++] = digits[do_div(num, base)]; 88 tmp[i++] = (digits[do_div(num, base)] | locase);
85 if (i > precision) 89 if (i > precision)
86 precision = i; 90 precision = i;
87 size -= precision; 91 size -= precision;
@@ -95,7 +99,7 @@ static char *number(char *str, long num, int base, int size, int precision,
95 *str++ = '0'; 99 *str++ = '0';
96 else if (base == 16) { 100 else if (base == 16) {
97 *str++ = '0'; 101 *str++ = '0';
98 *str++ = digits[33]; 102 *str++ = ('X' | locase);
99 } 103 }
100 } 104 }
101 if (!(type & LEFT)) 105 if (!(type & LEFT))
@@ -244,9 +248,9 @@ int vsprintf(char *buf, const char *fmt, va_list args)
244 base = 8; 248 base = 8;
245 break; 249 break;
246 250
247 case 'X':
248 flags |= LARGE;
249 case 'x': 251 case 'x':
252 flags |= SMALL;
253 case 'X':
250 base = 16; 254 base = 16;
251 break; 255 break;
252 256
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 77562e7cdab6..3df340b54e57 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1421,7 +1421,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
1421# CONFIG_DEBUG_VM is not set 1421# CONFIG_DEBUG_VM is not set
1422# CONFIG_DEBUG_LIST is not set 1422# CONFIG_DEBUG_LIST is not set
1423# CONFIG_FRAME_POINTER is not set 1423# CONFIG_FRAME_POINTER is not set
1424# CONFIG_FORCED_INLINING is not set
1425# CONFIG_RCU_TORTURE_TEST is not set 1424# CONFIG_RCU_TORTURE_TEST is not set
1426# CONFIG_LKDTM is not set 1425# CONFIG_LKDTM is not set
1427# CONFIG_FAULT_INJECTION is not set 1426# CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 9e2b0ef851de..eef98cb00c62 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1346,7 +1346,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
1346# CONFIG_DEBUG_VM is not set 1346# CONFIG_DEBUG_VM is not set
1347# CONFIG_DEBUG_LIST is not set 1347# CONFIG_DEBUG_LIST is not set
1348# CONFIG_FRAME_POINTER is not set 1348# CONFIG_FRAME_POINTER is not set
1349# CONFIG_FORCED_INLINING is not set
1350# CONFIG_RCU_TORTURE_TEST is not set 1349# CONFIG_RCU_TORTURE_TEST is not set
1351# CONFIG_LKDTM is not set 1350# CONFIG_LKDTM is not set
1352# CONFIG_FAULT_INJECTION is not set 1351# CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 21dc1a061bf1..76ec0f8f138a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -84,8 +84,6 @@ ifeq ($(CONFIG_X86_64),y)
84 obj-y += genapic_64.o genapic_flat_64.o 84 obj-y += genapic_64.o genapic_flat_64.o
85 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 85 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
86 obj-$(CONFIG_AUDIT) += audit_64.o 86 obj-$(CONFIG_AUDIT) += audit_64.o
87 obj-$(CONFIG_PM) += suspend_64.o
88 obj-$(CONFIG_HIBERNATION) += suspend_asm_64.o
89 87
90 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 88 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
91 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o 89 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 24885be5c48c..9b7e01daa1ca 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -118,7 +118,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
118 118
119static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) 119static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
120{ 120{
121 return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); 121 sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
122} 122}
123 123
124/* Mutex protecting device creation against CPU hotplug */ 124/* Mutex protecting device creation against CPU hotplug */
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index be5c31d04884..824e21b80aad 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -409,7 +409,8 @@ restore_nocheck_notrace:
409 RESTORE_REGS 409 RESTORE_REGS
410 addl $4, %esp # skip orig_eax/error_code 410 addl $4, %esp # skip orig_eax/error_code
411 CFI_ADJUST_CFA_OFFSET -4 411 CFI_ADJUST_CFA_OFFSET -4
4121: INTERRUPT_RETURN 412ENTRY(irq_return)
413 INTERRUPT_RETURN
413.section .fixup,"ax" 414.section .fixup,"ax"
414iret_exc: 415iret_exc:
415 pushl $0 # no error code 416 pushl $0 # no error code
@@ -418,7 +419,7 @@ iret_exc:
418.previous 419.previous
419.section __ex_table,"a" 420.section __ex_table,"a"
420 .align 4 421 .align 4
421 .long 1b,iret_exc 422 .long irq_return,iret_exc
422.previous 423.previous
423 424
424 CFI_RESTORE_STATE 425 CFI_RESTORE_STATE
@@ -865,20 +866,16 @@ nmi_espfix_stack:
865 RESTORE_REGS 866 RESTORE_REGS
866 lss 12+4(%esp), %esp # back to espfix stack 867 lss 12+4(%esp), %esp # back to espfix stack
867 CFI_ADJUST_CFA_OFFSET -24 868 CFI_ADJUST_CFA_OFFSET -24
8681: INTERRUPT_RETURN 869 jmp irq_return
869 CFI_ENDPROC 870 CFI_ENDPROC
870.section __ex_table,"a"
871 .align 4
872 .long 1b,iret_exc
873.previous
874KPROBE_END(nmi) 871KPROBE_END(nmi)
875 872
876#ifdef CONFIG_PARAVIRT 873#ifdef CONFIG_PARAVIRT
877ENTRY(native_iret) 874ENTRY(native_iret)
8781: iret 875 iret
879.section __ex_table,"a" 876.section __ex_table,"a"
880 .align 4 877 .align 4
881 .long 1b,iret_exc 878 .long native_iret, iret_exc
882.previous 879.previous
883END(native_iret) 880END(native_iret)
884 881
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c7341e81941c..6be39a387c5a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -581,16 +581,24 @@ retint_restore_args: /* return to kernel space */
581 */ 581 */
582 TRACE_IRQS_IRETQ 582 TRACE_IRQS_IRETQ
583restore_args: 583restore_args:
584 RESTORE_ARGS 0,8,0 584 RESTORE_ARGS 0,8,0
585#ifdef CONFIG_PARAVIRT 585
586ENTRY(irq_return)
586 INTERRUPT_RETURN 587 INTERRUPT_RETURN
587#endif 588
589 .section __ex_table, "a"
590 .quad irq_return, bad_iret
591 .previous
592
593#ifdef CONFIG_PARAVIRT
588ENTRY(native_iret) 594ENTRY(native_iret)
589 iretq 595 iretq
590 596
591 .section __ex_table,"a" 597 .section __ex_table,"a"
592 .quad native_iret, bad_iret 598 .quad native_iret, bad_iret
593 .previous 599 .previous
600#endif
601
594 .section .fixup,"ax" 602 .section .fixup,"ax"
595bad_iret: 603bad_iret:
596 /* 604 /*
@@ -804,7 +812,7 @@ paranoid_swapgs\trace:
804 SWAPGS_UNSAFE_STACK 812 SWAPGS_UNSAFE_STACK
805paranoid_restore\trace: 813paranoid_restore\trace:
806 RESTORE_ALL 8 814 RESTORE_ALL 8
807 INTERRUPT_RETURN 815 jmp irq_return
808paranoid_userspace\trace: 816paranoid_userspace\trace:
809 GET_THREAD_INFO(%rcx) 817 GET_THREAD_INFO(%rcx)
810 movl threadinfo_flags(%rcx),%ebx 818 movl threadinfo_flags(%rcx),%ebx
@@ -919,7 +927,7 @@ error_kernelspace:
919 iret run with kernel gs again, so don't set the user space flag. 927 iret run with kernel gs again, so don't set the user space flag.
920 B stepping K8s sometimes report an truncated RIP for IRET 928 B stepping K8s sometimes report an truncated RIP for IRET
921 exceptions returning to compat mode. Check for these here too. */ 929 exceptions returning to compat mode. Check for these here too. */
922 leaq native_iret(%rip),%rbp 930 leaq irq_return(%rip),%rbp
923 cmpq %rbp,RIP(%rsp) 931 cmpq %rbp,RIP(%rsp)
924 je error_swapgs 932 je error_swapgs
925 movl %ebp,%ebp /* zero extend */ 933 movl %ebp,%ebp /* zero extend */
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
index 9c7f7d395968..9dad6ca6cd70 100644
--- a/arch/x86/kernel/geode_32.c
+++ b/arch/x86/kernel/geode_32.c
@@ -163,14 +163,11 @@ EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
163 163
164static int __init geode_southbridge_init(void) 164static int __init geode_southbridge_init(void)
165{ 165{
166 int timers;
167
168 if (!is_geode()) 166 if (!is_geode())
169 return -ENODEV; 167 return -ENODEV;
170 168
171 init_lbars(); 169 init_lbars();
172 timers = geode_mfgpt_detect(); 170 (void) mfgpt_timer_setup();
173 printk(KERN_INFO "geode: %d MFGPT timers available.\n", timers);
174 return 0; 171 return 0;
175} 172}
176 173
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 5d8c5730686b..74ef4a41f224 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,6 +19,10 @@
19#include <asm/thread_info.h> 19#include <asm/thread_info.h>
20#include <asm/asm-offsets.h> 20#include <asm/asm-offsets.h>
21#include <asm/setup.h> 21#include <asm/setup.h>
22#include <asm/processor-flags.h>
23
24/* Physical address */
25#define pa(X) ((X) - __PAGE_OFFSET)
22 26
23/* 27/*
24 * References to members of the new_cpu_data structure. 28 * References to members of the new_cpu_data structure.
@@ -80,10 +84,6 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_
80 */ 84 */
81.section .text.head,"ax",@progbits 85.section .text.head,"ax",@progbits
82ENTRY(startup_32) 86ENTRY(startup_32)
83 /* check to see if KEEP_SEGMENTS flag is meaningful */
84 cmpw $0x207, BP_version(%esi)
85 jb 1f
86
87 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 87 /* test KEEP_SEGMENTS flag to see if the bootloader is asking
88 us to not reload segments */ 88 us to not reload segments */
89 testb $(1<<6), BP_loadflags(%esi) 89 testb $(1<<6), BP_loadflags(%esi)
@@ -92,7 +92,7 @@ ENTRY(startup_32)
92/* 92/*
93 * Set segments to known values. 93 * Set segments to known values.
94 */ 94 */
951: lgdt boot_gdt_descr - __PAGE_OFFSET 95 lgdt pa(boot_gdt_descr)
96 movl $(__BOOT_DS),%eax 96 movl $(__BOOT_DS),%eax
97 movl %eax,%ds 97 movl %eax,%ds
98 movl %eax,%es 98 movl %eax,%es
@@ -105,8 +105,8 @@ ENTRY(startup_32)
105 */ 105 */
106 cld 106 cld
107 xorl %eax,%eax 107 xorl %eax,%eax
108 movl $__bss_start - __PAGE_OFFSET,%edi 108 movl $pa(__bss_start),%edi
109 movl $__bss_stop - __PAGE_OFFSET,%ecx 109 movl $pa(__bss_stop),%ecx
110 subl %edi,%ecx 110 subl %edi,%ecx
111 shrl $2,%ecx 111 shrl $2,%ecx
112 rep ; stosl 112 rep ; stosl
@@ -118,31 +118,32 @@ ENTRY(startup_32)
118 * (kexec on panic case). Hence copy out the parameters before initializing 118 * (kexec on panic case). Hence copy out the parameters before initializing
119 * page tables. 119 * page tables.
120 */ 120 */
121 movl $(boot_params - __PAGE_OFFSET),%edi 121 movl $pa(boot_params),%edi
122 movl $(PARAM_SIZE/4),%ecx 122 movl $(PARAM_SIZE/4),%ecx
123 cld 123 cld
124 rep 124 rep
125 movsl 125 movsl
126 movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi 126 movl pa(boot_params) + NEW_CL_POINTER,%esi
127 andl %esi,%esi 127 andl %esi,%esi
128 jz 1f # No comand line 128 jz 1f # No comand line
129 movl $(boot_command_line - __PAGE_OFFSET),%edi 129 movl $pa(boot_command_line),%edi
130 movl $(COMMAND_LINE_SIZE/4),%ecx 130 movl $(COMMAND_LINE_SIZE/4),%ecx
131 rep 131 rep
132 movsl 132 movsl
1331: 1331:
134 134
135#ifdef CONFIG_PARAVIRT 135#ifdef CONFIG_PARAVIRT
136 cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET) 136 /* This is can only trip for a broken bootloader... */
137 cmpw $0x207, pa(boot_params + BP_version)
137 jb default_entry 138 jb default_entry
138 139
139 /* Paravirt-compatible boot parameters. Look to see what architecture 140 /* Paravirt-compatible boot parameters. Look to see what architecture
140 we're booting under. */ 141 we're booting under. */
141 movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax 142 movl pa(boot_params + BP_hardware_subarch), %eax
142 cmpl $num_subarch_entries, %eax 143 cmpl $num_subarch_entries, %eax
143 jae bad_subarch 144 jae bad_subarch
144 145
145 movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax 146 movl pa(subarch_entries)(,%eax,4), %eax
146 subl $__PAGE_OFFSET, %eax 147 subl $__PAGE_OFFSET, %eax
147 jmp *%eax 148 jmp *%eax
148 149
@@ -170,17 +171,68 @@ num_subarch_entries = (. - subarch_entries) / 4
170 * Mappings are created both at virtual address 0 (identity mapping) 171 * Mappings are created both at virtual address 0 (identity mapping)
171 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. 172 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
172 * 173 *
173 * Warning: don't use %esi or the stack in this code. However, %esp 174 * Note that the stack is not yet set up!
174 * can be used as a GPR if you really need it...
175 */ 175 */
176page_pde_offset = (__PAGE_OFFSET >> 20); 176#define PTE_ATTR 0x007 /* PRESENT+RW+USER */
177#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
178#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */
177 179
178default_entry: 180default_entry:
179 movl $(pg0 - __PAGE_OFFSET), %edi 181#ifdef CONFIG_X86_PAE
180 movl $(swapper_pg_dir - __PAGE_OFFSET), %edx 182
181 movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ 183 /*
184 * In PAE mode swapper_pg_dir is statically defined to contain enough
185 * entries to cover the VMSPLIT option (that is the top 1, 2 or 3
186 * entries). The identity mapping is handled by pointing two PGD
187 * entries to the first kernel PMD.
188 *
189 * Note the upper half of each PMD or PTE are always zero at
190 * this stage.
191 */
192
193#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */
194
195 xorl %ebx,%ebx /* %ebx is kept at zero */
196
197 movl $pa(pg0), %edi
198 movl $pa(swapper_pg_pmd), %edx
199 movl $PTE_ATTR, %eax
20010:
201 leal PDE_ATTR(%edi),%ecx /* Create PMD entry */
202 movl %ecx,(%edx) /* Store PMD entry */
203 /* Upper half already zero */
204 addl $8,%edx
205 movl $512,%ecx
20611:
207 stosl
208 xchgl %eax,%ebx
209 stosl
210 xchgl %eax,%ebx
211 addl $0x1000,%eax
212 loop 11b
213
214 /*
215 * End condition: we must map up to and including INIT_MAP_BEYOND_END
216 * bytes beyond the end of our own page tables.
217 */
218 leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
219 cmpl %ebp,%eax
220 jb 10b
2211:
222 movl %edi,pa(init_pg_tables_end)
223
224 /* Do early initialization of the fixmap area */
225 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
226 movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8)
227#else /* Not PAE */
228
229page_pde_offset = (__PAGE_OFFSET >> 20);
230
231 movl $pa(pg0), %edi
232 movl $pa(swapper_pg_dir), %edx
233 movl $PTE_ATTR, %eax
18210: 23410:
183 leal 0x007(%edi),%ecx /* Create PDE entry */ 235 leal PDE_ATTR(%edi),%ecx /* Create PDE entry */
184 movl %ecx,(%edx) /* Store identity PDE entry */ 236 movl %ecx,(%edx) /* Store identity PDE entry */
185 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ 237 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
186 addl $4,%edx 238 addl $4,%edx
@@ -189,19 +241,20 @@ default_entry:
189 stosl 241 stosl
190 addl $0x1000,%eax 242 addl $0x1000,%eax
191 loop 11b 243 loop 11b
192 /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ 244 /*
193 /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ 245 * End condition: we must map up to and including INIT_MAP_BEYOND_END
194 leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp 246 * bytes beyond the end of our own page tables; the +0x007 is
247 * the attribute bits
248 */
249 leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
195 cmpl %ebp,%eax 250 cmpl %ebp,%eax
196 jb 10b 251 jb 10b
197 movl %edi,(init_pg_tables_end - __PAGE_OFFSET) 252 movl %edi,pa(init_pg_tables_end)
198
199 /* Do an early initialization of the fixmap area */
200 movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
201 movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
202 addl $0x67, %eax /* 0x67 == _PAGE_TABLE */
203 movl %eax, 4092(%edx)
204 253
254 /* Do early initialization of the fixmap area */
255 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
256 movl %eax,pa(swapper_pg_dir+0xffc)
257#endif
205 jmp 3f 258 jmp 3f
206/* 259/*
207 * Non-boot CPU entry point; entered from trampoline.S 260 * Non-boot CPU entry point; entered from trampoline.S
@@ -241,7 +294,7 @@ ENTRY(startup_32_smp)
241 * NOTE! We have to correct for the fact that we're 294 * NOTE! We have to correct for the fact that we're
242 * not yet offset PAGE_OFFSET.. 295 * not yet offset PAGE_OFFSET..
243 */ 296 */
244#define cr4_bits mmu_cr4_features-__PAGE_OFFSET 297#define cr4_bits pa(mmu_cr4_features)
245 movl cr4_bits,%edx 298 movl cr4_bits,%edx
246 andl %edx,%edx 299 andl %edx,%edx
247 jz 6f 300 jz 6f
@@ -276,10 +329,10 @@ ENTRY(startup_32_smp)
276/* 329/*
277 * Enable paging 330 * Enable paging
278 */ 331 */
279 movl $swapper_pg_dir-__PAGE_OFFSET,%eax 332 movl $pa(swapper_pg_dir),%eax
280 movl %eax,%cr3 /* set the page table pointer.. */ 333 movl %eax,%cr3 /* set the page table pointer.. */
281 movl %cr0,%eax 334 movl %cr0,%eax
282 orl $0x80000000,%eax 335 orl $X86_CR0_PG,%eax
283 movl %eax,%cr0 /* ..and set paging (PG) bit */ 336 movl %eax,%cr0 /* ..and set paging (PG) bit */
284 ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ 337 ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
2851: 3381:
@@ -552,16 +605,44 @@ ENTRY(_stext)
552 */ 605 */
553.section ".bss.page_aligned","wa" 606.section ".bss.page_aligned","wa"
554 .align PAGE_SIZE_asm 607 .align PAGE_SIZE_asm
608#ifdef CONFIG_X86_PAE
609ENTRY(swapper_pg_pmd)
610 .fill 1024*KPMDS,4,0
611#else
555ENTRY(swapper_pg_dir) 612ENTRY(swapper_pg_dir)
556 .fill 1024,4,0 613 .fill 1024,4,0
557ENTRY(swapper_pg_pmd) 614#endif
615ENTRY(swapper_pg_fixmap)
558 .fill 1024,4,0 616 .fill 1024,4,0
559ENTRY(empty_zero_page) 617ENTRY(empty_zero_page)
560 .fill 4096,1,0 618 .fill 4096,1,0
561
562/* 619/*
563 * This starts the data section. 620 * This starts the data section.
564 */ 621 */
622#ifdef CONFIG_X86_PAE
623.section ".data.page_aligned","wa"
624 /* Page-aligned for the benefit of paravirt? */
625 .align PAGE_SIZE_asm
626ENTRY(swapper_pg_dir)
627 .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */
628# if KPMDS == 3
629 .long pa(swapper_pg_pmd+PGD_ATTR),0
630 .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
631 .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0
632# elif KPMDS == 2
633 .long 0,0
634 .long pa(swapper_pg_pmd+PGD_ATTR),0
635 .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
636# elif KPMDS == 1
637 .long 0,0
638 .long 0,0
639 .long pa(swapper_pg_pmd+PGD_ATTR),0
640# else
641# error "Kernel PMDs should be 1, 2 or 3"
642# endif
643 .align PAGE_SIZE_asm /* needs to be page-sized too */
644#endif
645
565.data 646.data
566ENTRY(stack_start) 647ENTRY(stack_start)
567 .long init_thread_union+THREAD_SIZE 648 .long init_thread_union+THREAD_SIZE
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 219f86eb6123..027fc067b399 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -12,48 +12,37 @@
12 */ 12 */
13 13
14/* 14/*
15 * We are using the 32Khz input clock - its the only one that has the 15 * We are using the 32.768kHz input clock - it's the only one that has the
16 * ranges we find desirable. The following table lists the suitable 16 * ranges we find desirable. The following table lists the suitable
17 * divisors and the associated hz, minimum interval 17 * divisors and the associated Hz, minimum interval and the maximum interval:
18 * and the maximum interval:
19 * 18 *
20 * Divisor Hz Min Delta (S) Max Delta (S) 19 * Divisor Hz Min Delta (s) Max Delta (s)
21 * 1 32000 .0005 2.048 20 * 1 32768 .00048828125 2.000
22 * 2 16000 .001 4.096 21 * 2 16384 .0009765625 4.000
23 * 4 8000 .002 8.192 22 * 4 8192 .001953125 8.000
24 * 8 4000 .004 16.384 23 * 8 4096 .00390625 16.000
25 * 16 2000 .008 32.768 24 * 16 2048 .0078125 32.000
26 * 32 1000 .016 65.536 25 * 32 1024 .015625 64.000
27 * 64 500 .032 131.072 26 * 64 512 .03125 128.000
28 * 128 250 .064 262.144 27 * 128 256 .0625 256.000
29 * 256 125 .128 524.288 28 * 256 128 .125 512.000
30 */ 29 */
31 30
32#include <linux/kernel.h> 31#include <linux/kernel.h>
33#include <linux/interrupt.h> 32#include <linux/interrupt.h>
34#include <linux/module.h>
35#include <asm/geode.h> 33#include <asm/geode.h>
36 34
37#define F_AVAIL 0x01
38
39static struct mfgpt_timer_t { 35static struct mfgpt_timer_t {
40 int flags; 36 unsigned int avail:1;
41 struct module *owner;
42} mfgpt_timers[MFGPT_MAX_TIMERS]; 37} mfgpt_timers[MFGPT_MAX_TIMERS];
43 38
44/* Selected from the table above */ 39/* Selected from the table above */
45 40
46#define MFGPT_DIVISOR 16 41#define MFGPT_DIVISOR 16
47#define MFGPT_SCALE 4 /* divisor = 2^(scale) */ 42#define MFGPT_SCALE 4 /* divisor = 2^(scale) */
48#define MFGPT_HZ (32000 / MFGPT_DIVISOR) 43#define MFGPT_HZ (32768 / MFGPT_DIVISOR)
49#define MFGPT_PERIODIC (MFGPT_HZ / HZ) 44#define MFGPT_PERIODIC (MFGPT_HZ / HZ)
50 45
51#ifdef CONFIG_GEODE_MFGPT_TIMER
52static int __init mfgpt_timer_setup(void);
53#else
54#define mfgpt_timer_setup() (0)
55#endif
56
57/* Allow for disabling of MFGPTs */ 46/* Allow for disabling of MFGPTs */
58static int disable; 47static int disable;
59static int __init mfgpt_disable(char *s) 48static int __init mfgpt_disable(char *s)
@@ -85,28 +74,37 @@ __setup("mfgptfix", mfgpt_fix);
85 * In other cases (such as with VSAless OpenFirmware), the system firmware 74 * In other cases (such as with VSAless OpenFirmware), the system firmware
86 * leaves timers available for us to use. 75 * leaves timers available for us to use.
87 */ 76 */
88int __init geode_mfgpt_detect(void) 77
78
79static int timers = -1;
80
81static void geode_mfgpt_detect(void)
89{ 82{
90 int count = 0, i; 83 int i;
91 u16 val; 84 u16 val;
92 85
86 timers = 0;
87
93 if (disable) { 88 if (disable) {
94 printk(KERN_INFO "geode-mfgpt: Skipping MFGPT setup\n"); 89 printk(KERN_INFO "geode-mfgpt: MFGPT support is disabled\n");
95 return 0; 90 goto done;
91 }
92
93 if (!geode_get_dev_base(GEODE_DEV_MFGPT)) {
94 printk(KERN_INFO "geode-mfgpt: MFGPT LBAR is not set up\n");
95 goto done;
96 } 96 }
97 97
98 for (i = 0; i < MFGPT_MAX_TIMERS; i++) { 98 for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
99 val = geode_mfgpt_read(i, MFGPT_REG_SETUP); 99 val = geode_mfgpt_read(i, MFGPT_REG_SETUP);
100 if (!(val & MFGPT_SETUP_SETUP)) { 100 if (!(val & MFGPT_SETUP_SETUP)) {
101 mfgpt_timers[i].flags = F_AVAIL; 101 mfgpt_timers[i].avail = 1;
102 count++; 102 timers++;
103 } 103 }
104 } 104 }
105 105
106 /* set up clock event device, if desired */ 106done:
107 i = mfgpt_timer_setup(); 107 printk(KERN_INFO "geode-mfgpt: %d MFGPT timers available.\n", timers);
108
109 return count;
110} 108}
111 109
112int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) 110int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
@@ -183,36 +181,41 @@ int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable)
183 return 0; 181 return 0;
184} 182}
185 183
186static int mfgpt_get(int timer, struct module *owner) 184static int mfgpt_get(int timer)
187{ 185{
188 mfgpt_timers[timer].flags &= ~F_AVAIL; 186 mfgpt_timers[timer].avail = 0;
189 mfgpt_timers[timer].owner = owner;
190 printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer); 187 printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer);
191 return timer; 188 return timer;
192} 189}
193 190
194int geode_mfgpt_alloc_timer(int timer, int domain, struct module *owner) 191int geode_mfgpt_alloc_timer(int timer, int domain)
195{ 192{
196 int i; 193 int i;
197 194
198 if (!geode_get_dev_base(GEODE_DEV_MFGPT)) 195 if (timers == -1) {
199 return -ENODEV; 196 /* timers haven't been detected yet */
197 geode_mfgpt_detect();
198 }
199
200 if (!timers)
201 return -1;
202
200 if (timer >= MFGPT_MAX_TIMERS) 203 if (timer >= MFGPT_MAX_TIMERS)
201 return -EIO; 204 return -1;
202 205
203 if (timer < 0) { 206 if (timer < 0) {
204 /* Try to find an available timer */ 207 /* Try to find an available timer */
205 for (i = 0; i < MFGPT_MAX_TIMERS; i++) { 208 for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
206 if (mfgpt_timers[i].flags & F_AVAIL) 209 if (mfgpt_timers[i].avail)
207 return mfgpt_get(i, owner); 210 return mfgpt_get(i);
208 211
209 if (i == 5 && domain == MFGPT_DOMAIN_WORKING) 212 if (i == 5 && domain == MFGPT_DOMAIN_WORKING)
210 break; 213 break;
211 } 214 }
212 } else { 215 } else {
213 /* If they requested a specific timer, try to honor that */ 216 /* If they requested a specific timer, try to honor that */
214 if (mfgpt_timers[timer].flags & F_AVAIL) 217 if (mfgpt_timers[timer].avail)
215 return mfgpt_get(timer, owner); 218 return mfgpt_get(timer);
216 } 219 }
217 220
218 /* No timers available - too bad */ 221 /* No timers available - too bad */
@@ -244,10 +247,11 @@ static int __init mfgpt_setup(char *str)
244} 247}
245__setup("mfgpt_irq=", mfgpt_setup); 248__setup("mfgpt_irq=", mfgpt_setup);
246 249
247static inline void mfgpt_disable_timer(u16 clock) 250static void mfgpt_disable_timer(u16 clock)
248{ 251{
249 u16 val = geode_mfgpt_read(clock, MFGPT_REG_SETUP); 252 /* avoid races by clearing CMP1 and CMP2 unconditionally */
250 geode_mfgpt_write(clock, MFGPT_REG_SETUP, val & ~MFGPT_SETUP_CNTEN); 253 geode_mfgpt_write(clock, MFGPT_REG_SETUP, (u16) ~MFGPT_SETUP_CNTEN |
254 MFGPT_SETUP_CMP1 | MFGPT_SETUP_CMP2);
251} 255}
252 256
253static int mfgpt_next_event(unsigned long, struct clock_event_device *); 257static int mfgpt_next_event(unsigned long, struct clock_event_device *);
@@ -263,7 +267,7 @@ static struct clock_event_device mfgpt_clockevent = {
263 .shift = 32 267 .shift = 32
264}; 268};
265 269
266static inline void mfgpt_start_timer(u16 clock, u16 delta) 270static void mfgpt_start_timer(u16 delta)
267{ 271{
268 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta); 272 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta);
269 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0); 273 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0);
@@ -278,21 +282,25 @@ static void mfgpt_set_mode(enum clock_event_mode mode,
278 mfgpt_disable_timer(mfgpt_event_clock); 282 mfgpt_disable_timer(mfgpt_event_clock);
279 283
280 if (mode == CLOCK_EVT_MODE_PERIODIC) 284 if (mode == CLOCK_EVT_MODE_PERIODIC)
281 mfgpt_start_timer(mfgpt_event_clock, MFGPT_PERIODIC); 285 mfgpt_start_timer(MFGPT_PERIODIC);
282 286
283 mfgpt_tick_mode = mode; 287 mfgpt_tick_mode = mode;
284} 288}
285 289
286static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt) 290static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt)
287{ 291{
288 mfgpt_start_timer(mfgpt_event_clock, delta); 292 mfgpt_start_timer(delta);
289 return 0; 293 return 0;
290} 294}
291 295
292/* Assume (foolishly?), that this interrupt was due to our tick */
293
294static irqreturn_t mfgpt_tick(int irq, void *dev_id) 296static irqreturn_t mfgpt_tick(int irq, void *dev_id)
295{ 297{
298 u16 val = geode_mfgpt_read(mfgpt_event_clock, MFGPT_REG_SETUP);
299
300 /* See if the interrupt was for us */
301 if (!(val & (MFGPT_SETUP_SETUP | MFGPT_SETUP_CMP2 | MFGPT_SETUP_CMP1)))
302 return IRQ_NONE;
303
296 /* Turn off the clock (and clear the event) */ 304 /* Turn off the clock (and clear the event) */
297 mfgpt_disable_timer(mfgpt_event_clock); 305 mfgpt_disable_timer(mfgpt_event_clock);
298 306
@@ -320,13 +328,12 @@ static struct irqaction mfgptirq = {
320 .name = "mfgpt-timer" 328 .name = "mfgpt-timer"
321}; 329};
322 330
323static int __init mfgpt_timer_setup(void) 331int __init mfgpt_timer_setup(void)
324{ 332{
325 int timer, ret; 333 int timer, ret;
326 u16 val; 334 u16 val;
327 335
328 timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING, 336 timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING);
329 THIS_MODULE);
330 if (timer < 0) { 337 if (timer < 0) {
331 printk(KERN_ERR 338 printk(KERN_ERR
332 "mfgpt-timer: Could not allocate a MFPGT timer\n"); 339 "mfgpt-timer: Could not allocate a MFPGT timer\n");
@@ -363,7 +370,7 @@ static int __init mfgpt_timer_setup(void)
363 &mfgpt_clockevent); 370 &mfgpt_clockevent);
364 371
365 printk(KERN_INFO 372 printk(KERN_INFO
366 "mfgpt-timer: registering the MFGT timer as a clock event.\n"); 373 "mfgpt-timer: registering the MFGPT timer as a clock event.\n");
367 clockevents_register_device(&mfgpt_clockevent); 374 clockevents_register_device(&mfgpt_clockevent);
368 375
369 return 0; 376 return 0;
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index d1d8c347cc0b..691ab4cb167b 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -154,7 +154,11 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
154struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; 154struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
155EXPORT_SYMBOL(boot_cpu_data); 155EXPORT_SYMBOL(boot_cpu_data);
156 156
157#ifndef CONFIG_X86_PAE
157unsigned long mmu_cr4_features; 158unsigned long mmu_cr4_features;
159#else
160unsigned long mmu_cr4_features = X86_CR4_PAE;
161#endif
158 162
159/* for MCA, but anyone else can use it if they want */ 163/* for MCA, but anyone else can use it if they want */
160unsigned int machine_id; 164unsigned int machine_id;
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index e6757aaa202b..a40051b71d9b 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -53,7 +53,7 @@ EXPORT_SYMBOL(arch_register_cpu);
53 53
54void arch_unregister_cpu(int num) 54void arch_unregister_cpu(int num)
55{ 55{
56 return unregister_cpu(&per_cpu(cpu_devices, num).cpu); 56 unregister_cpu(&per_cpu(cpu_devices, num).cpu);
57} 57}
58EXPORT_SYMBOL(arch_unregister_cpu); 58EXPORT_SYMBOL(arch_unregister_cpu);
59#else 59#else
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d1bc04006d16..8106bba41ecb 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -46,6 +46,7 @@
46#include <asm/pgalloc.h> 46#include <asm/pgalloc.h>
47#include <asm/sections.h> 47#include <asm/sections.h>
48#include <asm/paravirt.h> 48#include <asm/paravirt.h>
49#include <asm/setup.h>
49 50
50unsigned int __VMALLOC_RESERVE = 128 << 20; 51unsigned int __VMALLOC_RESERVE = 128 << 20;
51 52
@@ -328,44 +329,38 @@ pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
328 329
329void __init native_pagetable_setup_start(pgd_t *base) 330void __init native_pagetable_setup_start(pgd_t *base)
330{ 331{
331#ifdef CONFIG_X86_PAE 332 unsigned long pfn, va;
332 int i; 333 pgd_t *pgd;
334 pud_t *pud;
335 pmd_t *pmd;
336 pte_t *pte;
333 337
334 /* 338 /*
335 * Init entries of the first-level page table to the 339 * Remove any mappings which extend past the end of physical
336 * zero page, if they haven't already been set up. 340 * memory from the boot time page table:
337 *
338 * In a normal native boot, we'll be running on a
339 * pagetable rooted in swapper_pg_dir, but not in PAE
340 * mode, so this will end up clobbering the mappings
341 * for the lower 24Mbytes of the address space,
342 * without affecting the kernel address space.
343 */ 341 */
344 for (i = 0; i < USER_PTRS_PER_PGD; i++) 342 for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
345 set_pgd(&base[i], 343 va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
346 __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); 344 pgd = base + pgd_index(va);
347 345 if (!pgd_present(*pgd))
348 /* Make sure kernel address space is empty so that a pagetable 346 break;
349 will be allocated for it. */ 347
350 memset(&base[USER_PTRS_PER_PGD], 0, 348 pud = pud_offset(pgd, va);
351 KERNEL_PGD_PTRS * sizeof(pgd_t)); 349 pmd = pmd_offset(pud, va);
352#else 350 if (!pmd_present(*pmd))
351 break;
352
353 pte = pte_offset_kernel(pmd, va);
354 if (!pte_present(*pte))
355 break;
356
357 pte_clear(NULL, va, pte);
358 }
353 paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT); 359 paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
354#endif
355} 360}
356 361
357void __init native_pagetable_setup_done(pgd_t *base) 362void __init native_pagetable_setup_done(pgd_t *base)
358{ 363{
359#ifdef CONFIG_X86_PAE
360 /*
361 * Add low memory identity-mappings - SMP needs it when
362 * starting up on an AP from real-mode. In the non-PAE
363 * case we already have these mappings through head.S.
364 * All user-space mappings are explicitly cleared after
365 * SMP startup.
366 */
367 set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
368#endif
369} 364}
370 365
371/* 366/*
@@ -374,9 +369,8 @@ void __init native_pagetable_setup_done(pgd_t *base)
374 * the boot process. 369 * the boot process.
375 * 370 *
376 * If we're booting on native hardware, this will be a pagetable 371 * If we're booting on native hardware, this will be a pagetable
377 * constructed in arch/i386/kernel/head.S, and not running in PAE mode 372 * constructed in arch/x86/kernel/head_32.S. The root of the
378 * (even if we'll end up running in PAE). The root of the pagetable 373 * pagetable will be swapper_pg_dir.
379 * will be swapper_pg_dir.
380 * 374 *
381 * If we're booting paravirtualized under a hypervisor, then there are 375 * If we're booting paravirtualized under a hypervisor, then there are
382 * more options: we may already be running PAE, and the pagetable may 376 * more options: we may already be running PAE, and the pagetable may
@@ -537,14 +531,6 @@ void __init paging_init(void)
537 531
538 load_cr3(swapper_pg_dir); 532 load_cr3(swapper_pg_dir);
539 533
540#ifdef CONFIG_X86_PAE
541 /*
542 * We will bail out later - printk doesn't work right now so
543 * the user would just see a hanging kernel.
544 */
545 if (cpu_has_pae)
546 set_in_cr4(X86_CR4_PAE);
547#endif
548 __flush_tlb_all(); 534 __flush_tlb_all();
549 535
550 kmap_init(); 536 kmap_init();
@@ -675,13 +661,11 @@ void __init mem_init(void)
675 BUG_ON((unsigned long)high_memory > VMALLOC_START); 661 BUG_ON((unsigned long)high_memory > VMALLOC_START);
676#endif /* double-sanity-check paranoia */ 662#endif /* double-sanity-check paranoia */
677 663
678#ifdef CONFIG_X86_PAE
679 if (!cpu_has_pae)
680 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
681#endif
682 if (boot_cpu_data.wp_works_ok < 0) 664 if (boot_cpu_data.wp_works_ok < 0)
683 test_wp_bit(); 665 test_wp_bit();
684 666
667 cpa_init();
668
685 /* 669 /*
686 * Subtle. SMP is doing it's boot stuff late (because it has to 670 * Subtle. SMP is doing it's boot stuff late (because it has to
687 * fork idle threads) - but it also needs low mappings for the 671 * fork idle threads) - but it also needs low mappings for the
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5fe880fc305d..b59fc238151f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -528,13 +528,15 @@ void __init mem_init(void)
528 reservedpages << (PAGE_SHIFT-10), 528 reservedpages << (PAGE_SHIFT-10),
529 datasize >> 10, 529 datasize >> 10,
530 initsize >> 10); 530 initsize >> 10);
531
532 cpa_init();
531} 533}
532 534
533void free_init_pages(char *what, unsigned long begin, unsigned long end) 535void free_init_pages(char *what, unsigned long begin, unsigned long end)
534{ 536{
535 unsigned long addr; 537 unsigned long addr = begin;
536 538
537 if (begin >= end) 539 if (addr >= end)
538 return; 540 return;
539 541
540 /* 542 /*
@@ -549,7 +551,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
549#else 551#else
550 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); 552 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
551 553
552 for (addr = begin; addr < end; addr += PAGE_SIZE) { 554 for (; addr < end; addr += PAGE_SIZE) {
553 ClearPageReserved(virt_to_page(addr)); 555 ClearPageReserved(virt_to_page(addr));
554 init_page_count(virt_to_page(addr)); 556 init_page_count(virt_to_page(addr));
555 memset((void *)(addr & ~(PAGE_SIZE-1)), 557 memset((void *)(addr & ~(PAGE_SIZE-1)),
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ee6648fe6b15..a4897a85268a 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -260,41 +260,46 @@ static int __init early_ioremap_debug_setup(char *str)
260early_param("early_ioremap_debug", early_ioremap_debug_setup); 260early_param("early_ioremap_debug", early_ioremap_debug_setup);
261 261
262static __initdata int after_paging_init; 262static __initdata int after_paging_init;
263static __initdata unsigned long bm_pte[1024] 263static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
264 __attribute__((aligned(PAGE_SIZE))); 264 __attribute__((aligned(PAGE_SIZE)));
265 265
266static inline unsigned long * __init early_ioremap_pgd(unsigned long addr) 266static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
267{ 267{
268 return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023); 268 pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)];
269 pud_t *pud = pud_offset(pgd, addr);
270 pmd_t *pmd = pmd_offset(pud, addr);
271
272 return pmd;
269} 273}
270 274
271static inline unsigned long * __init early_ioremap_pte(unsigned long addr) 275static inline pte_t * __init early_ioremap_pte(unsigned long addr)
272{ 276{
273 return bm_pte + ((addr >> PAGE_SHIFT) & 1023); 277 return &bm_pte[pte_index(addr)];
274} 278}
275 279
276void __init early_ioremap_init(void) 280void __init early_ioremap_init(void)
277{ 281{
278 unsigned long *pgd; 282 pmd_t *pmd;
279 283
280 if (early_ioremap_debug) 284 if (early_ioremap_debug)
281 printk(KERN_INFO "early_ioremap_init()\n"); 285 printk(KERN_INFO "early_ioremap_init()\n");
282 286
283 pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN)); 287 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
284 *pgd = __pa(bm_pte) | _PAGE_TABLE;
285 memset(bm_pte, 0, sizeof(bm_pte)); 288 memset(bm_pte, 0, sizeof(bm_pte));
289 pmd_populate_kernel(&init_mm, pmd, bm_pte);
290
286 /* 291 /*
287 * The boot-ioremap range spans multiple pgds, for which 292 * The boot-ioremap range spans multiple pmds, for which
288 * we are not prepared: 293 * we are not prepared:
289 */ 294 */
290 if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) { 295 if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
291 WARN_ON(1); 296 WARN_ON(1);
292 printk(KERN_WARNING "pgd %p != %p\n", 297 printk(KERN_WARNING "pmd %p != %p\n",
293 pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))); 298 pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
294 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", 299 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
295 fix_to_virt(FIX_BTMAP_BEGIN)); 300 fix_to_virt(FIX_BTMAP_BEGIN));
296 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n", 301 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n",
297 fix_to_virt(FIX_BTMAP_END)); 302 fix_to_virt(FIX_BTMAP_END));
298 303
299 printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END); 304 printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
300 printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n", 305 printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n",
@@ -304,28 +309,29 @@ void __init early_ioremap_init(void)
304 309
305void __init early_ioremap_clear(void) 310void __init early_ioremap_clear(void)
306{ 311{
307 unsigned long *pgd; 312 pmd_t *pmd;
308 313
309 if (early_ioremap_debug) 314 if (early_ioremap_debug)
310 printk(KERN_INFO "early_ioremap_clear()\n"); 315 printk(KERN_INFO "early_ioremap_clear()\n");
311 316
312 pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN)); 317 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
313 *pgd = 0; 318 pmd_clear(pmd);
314 paravirt_release_pt(__pa(pgd) >> PAGE_SHIFT); 319 paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT);
315 __flush_tlb_all(); 320 __flush_tlb_all();
316} 321}
317 322
318void __init early_ioremap_reset(void) 323void __init early_ioremap_reset(void)
319{ 324{
320 enum fixed_addresses idx; 325 enum fixed_addresses idx;
321 unsigned long *pte, phys, addr; 326 unsigned long addr, phys;
327 pte_t *pte;
322 328
323 after_paging_init = 1; 329 after_paging_init = 1;
324 for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) { 330 for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
325 addr = fix_to_virt(idx); 331 addr = fix_to_virt(idx);
326 pte = early_ioremap_pte(addr); 332 pte = early_ioremap_pte(addr);
327 if (*pte & _PAGE_PRESENT) { 333 if (pte_present(*pte)) {
328 phys = *pte & PAGE_MASK; 334 phys = pte_val(*pte) & PAGE_MASK;
329 set_fixmap(idx, phys); 335 set_fixmap(idx, phys);
330 } 336 }
331 } 337 }
@@ -334,7 +340,8 @@ void __init early_ioremap_reset(void)
334static void __init __early_set_fixmap(enum fixed_addresses idx, 340static void __init __early_set_fixmap(enum fixed_addresses idx,
335 unsigned long phys, pgprot_t flags) 341 unsigned long phys, pgprot_t flags)
336{ 342{
337 unsigned long *pte, addr = __fix_to_virt(idx); 343 unsigned long addr = __fix_to_virt(idx);
344 pte_t *pte;
338 345
339 if (idx >= __end_of_fixed_addresses) { 346 if (idx >= __end_of_fixed_addresses) {
340 BUG(); 347 BUG();
@@ -342,9 +349,9 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
342 } 349 }
343 pte = early_ioremap_pte(addr); 350 pte = early_ioremap_pte(addr);
344 if (pgprot_val(flags)) 351 if (pgprot_val(flags))
345 *pte = (phys & PAGE_MASK) | pgprot_val(flags); 352 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
346 else 353 else
347 *pte = 0; 354 pte_clear(NULL, addr, pte);
348 __flush_tlb_one(addr); 355 __flush_tlb_one(addr);
349} 356}
350 357
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 8493c855582b..440210a2277d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -8,6 +8,7 @@
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/interrupt.h>
11 12
12#include <asm/e820.h> 13#include <asm/e820.h>
13#include <asm/processor.h> 14#include <asm/processor.h>
@@ -191,7 +192,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
191 * or when the present bit is not set. Otherwise we would return a 192 * or when the present bit is not set. Otherwise we would return a
192 * pointer to a nonexisting mapping. 193 * pointer to a nonexisting mapping.
193 */ 194 */
194pte_t *lookup_address(unsigned long address, int *level) 195pte_t *lookup_address(unsigned long address, unsigned int *level)
195{ 196{
196 pgd_t *pgd = pgd_offset_k(address); 197 pgd_t *pgd = pgd_offset_k(address);
197 pud_t *pud; 198 pud_t *pud;
@@ -252,10 +253,11 @@ static int
252try_preserve_large_page(pte_t *kpte, unsigned long address, 253try_preserve_large_page(pte_t *kpte, unsigned long address,
253 struct cpa_data *cpa) 254 struct cpa_data *cpa)
254{ 255{
255 unsigned long nextpage_addr, numpages, pmask, psize, flags; 256 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr;
256 pte_t new_pte, old_pte, *tmp; 257 pte_t new_pte, old_pte, *tmp;
257 pgprot_t old_prot, new_prot; 258 pgprot_t old_prot, new_prot;
258 int level, do_split = 1; 259 int i, do_split = 1;
260 unsigned int level;
259 261
260 spin_lock_irqsave(&pgd_lock, flags); 262 spin_lock_irqsave(&pgd_lock, flags);
261 /* 263 /*
@@ -302,6 +304,19 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
302 new_prot = static_protections(new_prot, address); 304 new_prot = static_protections(new_prot, address);
303 305
304 /* 306 /*
307 * We need to check the full range, whether
308 * static_protection() requires a different pgprot for one of
309 * the pages in the range we try to preserve:
310 */
311 addr = address + PAGE_SIZE;
312 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE) {
313 pgprot_t chk_prot = static_protections(new_prot, addr);
314
315 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
316 goto out_unlock;
317 }
318
319 /*
305 * If there are no changes, return. maxpages has been updated 320 * If there are no changes, return. maxpages has been updated
306 * above: 321 * above:
307 */ 322 */
@@ -335,23 +350,103 @@ out_unlock:
335 return do_split; 350 return do_split;
336} 351}
337 352
353static LIST_HEAD(page_pool);
354static unsigned long pool_size, pool_pages, pool_low;
355static unsigned long pool_used, pool_failed, pool_refill;
356
357static void cpa_fill_pool(void)
358{
359 struct page *p;
360 gfp_t gfp = GFP_KERNEL;
361
362 /* Do not allocate from interrupt context */
363 if (in_irq() || irqs_disabled())
364 return;
365 /*
366 * Check unlocked. I does not matter when we have one more
367 * page in the pool. The bit lock avoids recursive pool
368 * allocations:
369 */
370 if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
371 return;
372
373#ifdef CONFIG_DEBUG_PAGEALLOC
374 /*
375 * We could do:
376 * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
377 * but this fails on !PREEMPT kernels
378 */
379 gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
380#endif
381
382 while (pool_pages < pool_size) {
383 p = alloc_pages(gfp, 0);
384 if (!p) {
385 pool_failed++;
386 break;
387 }
388 spin_lock_irq(&pgd_lock);
389 list_add(&p->lru, &page_pool);
390 pool_pages++;
391 spin_unlock_irq(&pgd_lock);
392 }
393 clear_bit_unlock(0, &pool_refill);
394}
395
396#define SHIFT_MB (20 - PAGE_SHIFT)
397#define ROUND_MB_GB ((1 << 10) - 1)
398#define SHIFT_MB_GB 10
399#define POOL_PAGES_PER_GB 16
400
401void __init cpa_init(void)
402{
403 struct sysinfo si;
404 unsigned long gb;
405
406 si_meminfo(&si);
407 /*
408 * Calculate the number of pool pages:
409 *
410 * Convert totalram (nr of pages) to MiB and round to the next
411 * GiB. Shift MiB to Gib and multiply the result by
412 * POOL_PAGES_PER_GB:
413 */
414 gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
415 pool_size = POOL_PAGES_PER_GB * gb;
416 pool_low = pool_size;
417
418 cpa_fill_pool();
419 printk(KERN_DEBUG
420 "CPA: page pool initialized %lu of %lu pages preallocated\n",
421 pool_pages, pool_size);
422}
423
338static int split_large_page(pte_t *kpte, unsigned long address) 424static int split_large_page(pte_t *kpte, unsigned long address)
339{ 425{
340 unsigned long flags, pfn, pfninc = 1; 426 unsigned long flags, pfn, pfninc = 1;
341 gfp_t gfp_flags = GFP_KERNEL;
342 unsigned int i, level; 427 unsigned int i, level;
343 pte_t *pbase, *tmp; 428 pte_t *pbase, *tmp;
344 pgprot_t ref_prot; 429 pgprot_t ref_prot;
345 struct page *base; 430 struct page *base;
346 431
347#ifdef CONFIG_DEBUG_PAGEALLOC 432 /*
348 gfp_flags = GFP_ATOMIC | __GFP_NOWARN; 433 * Get a page from the pool. The pool list is protected by the
349#endif 434 * pgd_lock, which we have to take anyway for the split
350 base = alloc_pages(gfp_flags, 0); 435 * operation:
351 if (!base) 436 */
437 spin_lock_irqsave(&pgd_lock, flags);
438 if (list_empty(&page_pool)) {
439 spin_unlock_irqrestore(&pgd_lock, flags);
352 return -ENOMEM; 440 return -ENOMEM;
441 }
442
443 base = list_first_entry(&page_pool, struct page, lru);
444 list_del(&base->lru);
445 pool_pages--;
446
447 if (pool_pages < pool_low)
448 pool_low = pool_pages;
353 449
354 spin_lock_irqsave(&pgd_lock, flags);
355 /* 450 /*
356 * Check for races, another CPU might have split this page 451 * Check for races, another CPU might have split this page
357 * up for us already: 452 * up for us already:
@@ -396,17 +491,24 @@ static int split_large_page(pte_t *kpte, unsigned long address)
396 base = NULL; 491 base = NULL;
397 492
398out_unlock: 493out_unlock:
494 /*
495 * If we dropped out via the lookup_address check under
496 * pgd_lock then stick the page back into the pool:
497 */
498 if (base) {
499 list_add(&base->lru, &page_pool);
500 pool_pages++;
501 } else
502 pool_used++;
399 spin_unlock_irqrestore(&pgd_lock, flags); 503 spin_unlock_irqrestore(&pgd_lock, flags);
400 504
401 if (base)
402 __free_pages(base, 0);
403
404 return 0; 505 return 0;
405} 506}
406 507
407static int __change_page_attr(unsigned long address, struct cpa_data *cpa) 508static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
408{ 509{
409 int level, do_split, err; 510 int do_split, err;
511 unsigned int level;
410 struct page *kpte_page; 512 struct page *kpte_page;
411 pte_t *kpte; 513 pte_t *kpte;
412 514
@@ -598,7 +700,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
598 * Check whether we really changed something: 700 * Check whether we really changed something:
599 */ 701 */
600 if (!cpa.flushtlb) 702 if (!cpa.flushtlb)
601 return ret; 703 goto out;
602 704
603 /* 705 /*
604 * No need to flush, when we did not set any of the caching 706 * No need to flush, when we did not set any of the caching
@@ -617,6 +719,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
617 else 719 else
618 cpa_flush_all(cache); 720 cpa_flush_all(cache);
619 721
722out:
723 cpa_fill_pool();
620 return ret; 724 return ret;
621} 725}
622 726
@@ -770,6 +874,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
770 * but that can deadlock->flush only current cpu: 874 * but that can deadlock->flush only current cpu:
771 */ 875 */
772 __flush_tlb_all(); 876 __flush_tlb_all();
877
878 /*
879 * Try to refill the page pool here. We can do this only after
880 * the tlb flush.
881 */
882 cpa_fill_pool();
773} 883}
774#endif 884#endif
775 885
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile
index d764ec950065..9ff4d5b55ad1 100644
--- a/arch/x86/power/Makefile
+++ b/arch/x86/power/Makefile
@@ -1,2 +1,2 @@
1obj-$(CONFIG_PM) += cpu.o 1obj-$(CONFIG_PM_SLEEP) += cpu_$(BITS).o
2obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o 2obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu_32.c
index efcf620d1439..7f9c6da04a4c 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu_32.c
@@ -40,7 +40,7 @@ static void __save_processor_state(struct saved_context *ctxt)
40 savesegment(ss, ctxt->ss); 40 savesegment(ss, ctxt->ss);
41 41
42 /* 42 /*
43 * control registers 43 * control registers
44 */ 44 */
45 ctxt->cr0 = read_cr0(); 45 ctxt->cr0 = read_cr0();
46 ctxt->cr2 = read_cr2(); 46 ctxt->cr2 = read_cr2();
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/power/cpu_64.c
index 7ac7130022f1..66bdfb591fd8 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -1,8 +1,9 @@
1/* 1/*
2 * Suspend support specific for i386. 2 * Suspend and hibernation support for x86-64
3 * 3 *
4 * Distribute under GPLv2 4 * Distribute under GPLv2
5 * 5 *
6 * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
6 * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> 7 * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
7 * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> 8 * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
8 */ 9 */
@@ -14,9 +15,6 @@
14#include <asm/pgtable.h> 15#include <asm/pgtable.h>
15#include <asm/mtrr.h> 16#include <asm/mtrr.h>
16 17
17/* References to section boundaries */
18extern const void __nosave_begin, __nosave_end;
19
20static void fix_processor_context(void); 18static void fix_processor_context(void);
21 19
22struct saved_context saved_context; 20struct saved_context saved_context;
@@ -63,7 +61,7 @@ static void __save_processor_state(struct saved_context *ctxt)
63 mtrr_save_fixed_ranges(NULL); 61 mtrr_save_fixed_ranges(NULL);
64 62
65 /* 63 /*
66 * control registers 64 * control registers
67 */ 65 */
68 rdmsrl(MSR_EFER, ctxt->efer); 66 rdmsrl(MSR_EFER, ctxt->efer);
69 ctxt->cr0 = read_cr0(); 67 ctxt->cr0 = read_cr0();
@@ -166,155 +164,3 @@ static void fix_processor_context(void)
166 loaddebug(&current->thread, 7); 164 loaddebug(&current->thread, 7);
167 } 165 }
168} 166}
169
170#ifdef CONFIG_HIBERNATION
171/* Defined in arch/x86_64/kernel/suspend_asm.S */
172extern int restore_image(void);
173
174/*
175 * Address to jump to in the last phase of restore in order to get to the image
176 * kernel's text (this value is passed in the image header).
177 */
178unsigned long restore_jump_address;
179
180/*
181 * Value of the cr3 register from before the hibernation (this value is passed
182 * in the image header).
183 */
184unsigned long restore_cr3;
185
186pgd_t *temp_level4_pgt;
187
188void *relocated_restore_code;
189
190static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
191{
192 long i, j;
193
194 i = pud_index(address);
195 pud = pud + i;
196 for (; i < PTRS_PER_PUD; pud++, i++) {
197 unsigned long paddr;
198 pmd_t *pmd;
199
200 paddr = address + i*PUD_SIZE;
201 if (paddr >= end)
202 break;
203
204 pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
205 if (!pmd)
206 return -ENOMEM;
207 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
208 for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
209 unsigned long pe;
210
211 if (paddr >= end)
212 break;
213 pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
214 pe &= __supported_pte_mask;
215 set_pmd(pmd, __pmd(pe));
216 }
217 }
218 return 0;
219}
220
221static int set_up_temporary_mappings(void)
222{
223 unsigned long start, end, next;
224 int error;
225
226 temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
227 if (!temp_level4_pgt)
228 return -ENOMEM;
229
230 /* It is safe to reuse the original kernel mapping */
231 set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
232 init_level4_pgt[pgd_index(__START_KERNEL_map)]);
233
234 /* Set up the direct mapping from scratch */
235 start = (unsigned long)pfn_to_kaddr(0);
236 end = (unsigned long)pfn_to_kaddr(end_pfn);
237
238 for (; start < end; start = next) {
239 pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
240 if (!pud)
241 return -ENOMEM;
242 next = start + PGDIR_SIZE;
243 if (next > end)
244 next = end;
245 if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
246 return error;
247 set_pgd(temp_level4_pgt + pgd_index(start),
248 mk_kernel_pgd(__pa(pud)));
249 }
250 return 0;
251}
252
253int swsusp_arch_resume(void)
254{
255 int error;
256
257 /* We have got enough memory and from now on we cannot recover */
258 if ((error = set_up_temporary_mappings()))
259 return error;
260
261 relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
262 if (!relocated_restore_code)
263 return -ENOMEM;
264 memcpy(relocated_restore_code, &core_restore_code,
265 &restore_registers - &core_restore_code);
266
267 restore_image();
268 return 0;
269}
270
271/*
272 * pfn_is_nosave - check if given pfn is in the 'nosave' section
273 */
274
275int pfn_is_nosave(unsigned long pfn)
276{
277 unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
278 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
279 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
280}
281
282struct restore_data_record {
283 unsigned long jump_address;
284 unsigned long cr3;
285 unsigned long magic;
286};
287
288#define RESTORE_MAGIC 0x0123456789ABCDEFUL
289
290/**
291 * arch_hibernation_header_save - populate the architecture specific part
292 * of a hibernation image header
293 * @addr: address to save the data at
294 */
295int arch_hibernation_header_save(void *addr, unsigned int max_size)
296{
297 struct restore_data_record *rdr = addr;
298
299 if (max_size < sizeof(struct restore_data_record))
300 return -EOVERFLOW;
301 rdr->jump_address = restore_jump_address;
302 rdr->cr3 = restore_cr3;
303 rdr->magic = RESTORE_MAGIC;
304 return 0;
305}
306
307/**
308 * arch_hibernation_header_restore - read the architecture specific data
309 * from the hibernation image header
310 * @addr: address to read the data from
311 */
312int arch_hibernation_header_restore(void *addr)
313{
314 struct restore_data_record *rdr = addr;
315
316 restore_jump_address = rdr->jump_address;
317 restore_cr3 = rdr->cr3;
318 return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
319}
320#endif /* CONFIG_HIBERNATION */
diff --git a/arch/x86/power/suspend.c b/arch/x86/power/hibernate_32.c
index a0020b913f31..f2b6e3f11bfc 100644
--- a/arch/x86/power/suspend.c
+++ b/arch/x86/power/hibernate_32.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Suspend support specific for i386 - temporary page tables 2 * Hibernation support specific for i386 - temporary page tables
3 * 3 *
4 * Distribute under GPLv2 4 * Distribute under GPLv2
5 * 5 *
@@ -13,7 +13,7 @@
13#include <asm/page.h> 13#include <asm/page.h>
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15 15
16/* Defined in arch/i386/power/swsusp.S */ 16/* Defined in hibernate_asm_32.S */
17extern int restore_image(void); 17extern int restore_image(void);
18 18
19/* References to section boundaries */ 19/* References to section boundaries */
@@ -23,7 +23,7 @@ extern const void __nosave_begin, __nosave_end;
23pgd_t *resume_pg_dir; 23pgd_t *resume_pg_dir;
24 24
25/* The following three functions are based on the analogous code in 25/* The following three functions are based on the analogous code in
26 * arch/i386/mm/init.c 26 * arch/x86/mm/init_32.c
27 */ 27 */
28 28
29/* 29/*
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
new file mode 100644
index 000000000000..b542355e0e34
--- /dev/null
+++ b/arch/x86/power/hibernate_64.c
@@ -0,0 +1,169 @@
1/*
2 * Hibernation support for x86-64
3 *
4 * Distribute under GPLv2
5 *
6 * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
7 * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
8 * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
9 */
10
11#include <linux/smp.h>
12#include <linux/suspend.h>
13#include <asm/proto.h>
14#include <asm/page.h>
15#include <asm/pgtable.h>
16#include <asm/mtrr.h>
17
18/* References to section boundaries */
19extern const void __nosave_begin, __nosave_end;
20
21/* Defined in hibernate_asm_64.S */
22extern int restore_image(void);
23
24/*
25 * Address to jump to in the last phase of restore in order to get to the image
26 * kernel's text (this value is passed in the image header).
27 */
28unsigned long restore_jump_address;
29
30/*
31 * Value of the cr3 register from before the hibernation (this value is passed
32 * in the image header).
33 */
34unsigned long restore_cr3;
35
36pgd_t *temp_level4_pgt;
37
38void *relocated_restore_code;
39
40static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
41{
42 long i, j;
43
44 i = pud_index(address);
45 pud = pud + i;
46 for (; i < PTRS_PER_PUD; pud++, i++) {
47 unsigned long paddr;
48 pmd_t *pmd;
49
50 paddr = address + i*PUD_SIZE;
51 if (paddr >= end)
52 break;
53
54 pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
55 if (!pmd)
56 return -ENOMEM;
57 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
58 for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
59 unsigned long pe;
60
61 if (paddr >= end)
62 break;
63 pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
64 pe &= __supported_pte_mask;
65 set_pmd(pmd, __pmd(pe));
66 }
67 }
68 return 0;
69}
70
71static int set_up_temporary_mappings(void)
72{
73 unsigned long start, end, next;
74 int error;
75
76 temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
77 if (!temp_level4_pgt)
78 return -ENOMEM;
79
80 /* It is safe to reuse the original kernel mapping */
81 set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
82 init_level4_pgt[pgd_index(__START_KERNEL_map)]);
83
84 /* Set up the direct mapping from scratch */
85 start = (unsigned long)pfn_to_kaddr(0);
86 end = (unsigned long)pfn_to_kaddr(end_pfn);
87
88 for (; start < end; start = next) {
89 pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
90 if (!pud)
91 return -ENOMEM;
92 next = start + PGDIR_SIZE;
93 if (next > end)
94 next = end;
95 if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
96 return error;
97 set_pgd(temp_level4_pgt + pgd_index(start),
98 mk_kernel_pgd(__pa(pud)));
99 }
100 return 0;
101}
102
103int swsusp_arch_resume(void)
104{
105 int error;
106
107 /* We have got enough memory and from now on we cannot recover */
108 if ((error = set_up_temporary_mappings()))
109 return error;
110
111 relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
112 if (!relocated_restore_code)
113 return -ENOMEM;
114 memcpy(relocated_restore_code, &core_restore_code,
115 &restore_registers - &core_restore_code);
116
117 restore_image();
118 return 0;
119}
120
121/*
122 * pfn_is_nosave - check if given pfn is in the 'nosave' section
123 */
124
125int pfn_is_nosave(unsigned long pfn)
126{
127 unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
128 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
129 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
130}
131
132struct restore_data_record {
133 unsigned long jump_address;
134 unsigned long cr3;
135 unsigned long magic;
136};
137
138#define RESTORE_MAGIC 0x0123456789ABCDEFUL
139
140/**
141 * arch_hibernation_header_save - populate the architecture specific part
142 * of a hibernation image header
143 * @addr: address to save the data at
144 */
145int arch_hibernation_header_save(void *addr, unsigned int max_size)
146{
147 struct restore_data_record *rdr = addr;
148
149 if (max_size < sizeof(struct restore_data_record))
150 return -EOVERFLOW;
151 rdr->jump_address = restore_jump_address;
152 rdr->cr3 = restore_cr3;
153 rdr->magic = RESTORE_MAGIC;
154 return 0;
155}
156
157/**
158 * arch_hibernation_header_restore - read the architecture specific data
159 * from the hibernation image header
160 * @addr: address to read the data from
161 */
162int arch_hibernation_header_restore(void *addr)
163{
164 struct restore_data_record *rdr = addr;
165
166 restore_jump_address = rdr->jump_address;
167 restore_cr3 = rdr->cr3;
168 return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
169}
diff --git a/arch/x86/power/swsusp.S b/arch/x86/power/hibernate_asm_32.S
index 53662e05b393..b95aa6cfe3cb 100644
--- a/arch/x86/power/swsusp.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -1,7 +1,6 @@
1.text 1.text
2 2
3/* Originally gcc generated, modified by hand 3/*
4 *
5 * This may not use any stack, nor any variable that is not "NoSave": 4 * This may not use any stack, nor any variable that is not "NoSave":
6 * 5 *
7 * Its rewriting one kernel image with another. What is stack in "old" 6 * Its rewriting one kernel image with another. What is stack in "old"
diff --git a/arch/x86/kernel/suspend_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index aeb9a4d7681e..1deb3244b99b 100644
--- a/arch/x86/kernel/suspend_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -1,7 +1,12 @@
1/* Copyright 2004,2005 Pavel Machek <pavel@suse.cz>, Andi Kleen <ak@suse.de>, Rafael J. Wysocki <rjw@sisk.pl> 1/*
2 * Hibernation support for x86-64
2 * 3 *
3 * Distribute under GPLv2. 4 * Distribute under GPLv2.
4 * 5 *
6 * Copyright 2007 Rafael J. Wysocki <rjw@sisk.pl>
7 * Copyright 2005 Andi Kleen <ak@suse.de>
8 * Copyright 2004 Pavel Machek <pavel@suse.cz>
9 *
5 * swsusp_arch_resume must not use any stack or any nonlocal variables while 10 * swsusp_arch_resume must not use any stack or any nonlocal variables while
6 * copying pages: 11 * copying pages:
7 * 12 *
@@ -9,7 +14,7 @@
9 * image could very well be data page in "new" image, and overwriting 14 * image could very well be data page in "new" image, and overwriting
10 * your own stack under you is bad idea. 15 * your own stack under you is bad idea.
11 */ 16 */
12 17
13 .text 18 .text
14#include <linux/linkage.h> 19#include <linux/linkage.h>
15#include <asm/segment.h> 20#include <asm/segment.h>
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 45aa771e73a9..0144395448ae 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -58,7 +58,7 @@
58 58
59xmaddr_t arbitrary_virt_to_machine(unsigned long address) 59xmaddr_t arbitrary_virt_to_machine(unsigned long address)
60{ 60{
61 int level; 61 unsigned int level;
62 pte_t *pte = lookup_address(address, &level); 62 pte_t *pte = lookup_address(address, &level);
63 unsigned offset = address & PAGE_MASK; 63 unsigned offset = address & PAGE_MASK;
64 64
@@ -71,7 +71,7 @@ void make_lowmem_page_readonly(void *vaddr)
71{ 71{
72 pte_t *pte, ptev; 72 pte_t *pte, ptev;
73 unsigned long address = (unsigned long)vaddr; 73 unsigned long address = (unsigned long)vaddr;
74 int level; 74 unsigned int level;
75 75
76 pte = lookup_address(address, &level); 76 pte = lookup_address(address, &level);
77 BUG_ON(pte == NULL); 77 BUG_ON(pte == NULL);
@@ -86,7 +86,7 @@ void make_lowmem_page_readwrite(void *vaddr)
86{ 86{
87 pte_t *pte, ptev; 87 pte_t *pte, ptev;
88 unsigned long address = (unsigned long)vaddr; 88 unsigned long address = (unsigned long)vaddr;
89 int level; 89 unsigned int level;
90 90
91 pte = lookup_address(address, &level); 91 pte = lookup_address(address, &level);
92 BUG_ON(pte == NULL); 92 BUG_ON(pte == NULL);
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index b3721fd6877b..c39e1a5aa241 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -217,17 +217,17 @@ unsigned long long xen_sched_clock(void)
217/* Get the CPU speed from Xen */ 217/* Get the CPU speed from Xen */
218unsigned long xen_cpu_khz(void) 218unsigned long xen_cpu_khz(void)
219{ 219{
220 u64 cpu_khz = 1000000ULL << 32; 220 u64 xen_khz = 1000000ULL << 32;
221 const struct vcpu_time_info *info = 221 const struct vcpu_time_info *info =
222 &HYPERVISOR_shared_info->vcpu_info[0].time; 222 &HYPERVISOR_shared_info->vcpu_info[0].time;
223 223
224 do_div(cpu_khz, info->tsc_to_system_mul); 224 do_div(xen_khz, info->tsc_to_system_mul);
225 if (info->tsc_shift < 0) 225 if (info->tsc_shift < 0)
226 cpu_khz <<= -info->tsc_shift; 226 xen_khz <<= -info->tsc_shift;
227 else 227 else
228 cpu_khz >>= info->tsc_shift; 228 xen_khz >>= info->tsc_shift;
229 229
230 return cpu_khz; 230 return xen_khz;
231} 231}
232 232
233/* 233/*
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 8b0d4b7d188a..ce3c0a2cbac4 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -31,6 +31,7 @@
31#include <linux/pm.h> 31#include <linux/pm.h>
32#include <linux/device.h> 32#include <linux/device.h>
33#include <linux/proc_fs.h> 33#include <linux/proc_fs.h>
34#include <linux/acpi.h>
34#ifdef CONFIG_X86 35#ifdef CONFIG_X86
35#include <asm/mpspec.h> 36#include <asm/mpspec.h>
36#endif 37#endif
@@ -39,9 +40,6 @@
39 40
40#define _COMPONENT ACPI_BUS_COMPONENT 41#define _COMPONENT ACPI_BUS_COMPONENT
41ACPI_MODULE_NAME("bus"); 42ACPI_MODULE_NAME("bus");
42#ifdef CONFIG_X86
43extern void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger);
44#endif
45 43
46struct acpi_device *acpi_root; 44struct acpi_device *acpi_root;
47struct proc_dir_entry *acpi_root_dir; 45struct proc_dir_entry *acpi_root_dir;
@@ -653,8 +651,6 @@ void __init acpi_early_init(void)
653 651
654#ifdef CONFIG_X86 652#ifdef CONFIG_X86
655 if (!acpi_ioapic) { 653 if (!acpi_ioapic) {
656 extern u8 acpi_sci_flags;
657
658 /* compatible (0) means level (3) */ 654 /* compatible (0) means level (3) */
659 if (!(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)) { 655 if (!(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)) {
660 acpi_sci_flags &= ~ACPI_MADT_TRIGGER_MASK; 656 acpi_sci_flags &= ~ACPI_MADT_TRIGGER_MASK;
@@ -664,7 +660,6 @@ void __init acpi_early_init(void)
664 acpi_pic_sci_set_trigger(acpi_gbl_FADT.sci_interrupt, 660 acpi_pic_sci_set_trigger(acpi_gbl_FADT.sci_interrupt,
665 (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2); 661 (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2);
666 } else { 662 } else {
667 extern int acpi_sci_override_gsi;
668 /* 663 /*
669 * now that acpi_gbl_FADT is initialized, 664 * now that acpi_gbl_FADT is initialized,
670 * update it with result from INT_SRC_OVR parsing 665 * update it with result from INT_SRC_OVR parsing
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 74b4cf2a6c41..275f23c2deb4 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -178,8 +178,8 @@ static void release_pte(pte_t pte)
178 178
179static void check_gpte(struct lg_cpu *cpu, pte_t gpte) 179static void check_gpte(struct lg_cpu *cpu, pte_t gpte)
180{ 180{
181 if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE)) 181 if ((pte_flags(gpte) & _PAGE_PSE) ||
182 || pte_pfn(gpte) >= cpu->lg->pfn_limit) 182 pte_pfn(gpte) >= cpu->lg->pfn_limit)
183 kill_guest(cpu, "bad page table entry"); 183 kill_guest(cpu, "bad page table entry");
184} 184}
185 185
diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h
index 98a9ca266531..7a72d6aa50be 100644
--- a/include/asm-x86/acpi.h
+++ b/include/asm-x86/acpi.h
@@ -89,6 +89,10 @@ extern int acpi_pci_disabled;
89extern int acpi_skip_timer_override; 89extern int acpi_skip_timer_override;
90extern int acpi_use_timer_override; 90extern int acpi_use_timer_override;
91 91
92extern u8 acpi_sci_flags;
93extern int acpi_sci_override_gsi;
94void acpi_pic_sci_set_trigger(unsigned int, u16);
95
92static inline void disable_acpi(void) 96static inline void disable_acpi(void)
93{ 97{
94 acpi_disabled = 1; 98 acpi_disabled = 1;
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index 8dd8c5e3cc7f..6a22212b4b20 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -44,6 +44,8 @@ int set_memory_np(unsigned long addr, int numpages);
44 44
45void clflush_cache_range(void *addr, unsigned int size); 45void clflush_cache_range(void *addr, unsigned int size);
46 46
47void cpa_init(void);
48
47#ifdef CONFIG_DEBUG_RODATA 49#ifdef CONFIG_DEBUG_RODATA
48void mark_rodata_ro(void); 50void mark_rodata_ro(void);
49#endif 51#endif
diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h
index 811fe14f70b2..9e7280092a48 100644
--- a/include/asm-x86/geode.h
+++ b/include/asm-x86/geode.h
@@ -206,12 +206,17 @@ static inline u16 geode_mfgpt_read(int timer, u16 reg)
206 return inw(base + reg + (timer * 8)); 206 return inw(base + reg + (timer * 8));
207} 207}
208 208
209extern int __init geode_mfgpt_detect(void);
210extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable); 209extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable);
211extern int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable); 210extern int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable);
212extern int geode_mfgpt_alloc_timer(int timer, int domain, struct module *owner); 211extern int geode_mfgpt_alloc_timer(int timer, int domain);
213 212
214#define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1) 213#define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1)
215#define geode_mfgpt_release_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 0) 214#define geode_mfgpt_release_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 0)
216 215
216#ifdef CONFIG_GEODE_MFGPT_TIMER
217extern int __init mfgpt_timer_setup(void);
218#else
219static inline int mfgpt_timer_setup(void) { return 0; }
220#endif
221
217#endif 222#endif
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 984998a30741..5f7257fd589b 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -48,7 +48,6 @@ typedef unsigned long pgprotval_t;
48typedef unsigned long phys_addr_t; 48typedef unsigned long phys_addr_t;
49 49
50typedef union { pteval_t pte, pte_low; } pte_t; 50typedef union { pteval_t pte, pte_low; } pte_t;
51typedef pte_t boot_pte_t;
52 51
53#endif /* __ASSEMBLY__ */ 52#endif /* __ASSEMBLY__ */
54#endif /* CONFIG_X86_PAE */ 53#endif /* CONFIG_X86_PAE */
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 44c0a4f1b1eb..174b87738714 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -255,7 +255,7 @@ enum {
255 * NOTE: the return type is pte_t but if the pmd is PSE then we return it 255 * NOTE: the return type is pte_t but if the pmd is PSE then we return it
256 * as a pte too. 256 * as a pte too.
257 */ 257 */
258extern pte_t *lookup_address(unsigned long address, int *level); 258extern pte_t *lookup_address(unsigned long address, unsigned int *level);
259 259
260/* local pte updates need not use xchg for locking */ 260/* local pte updates need not use xchg for locking */
261static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) 261static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 80dd438642f6..a842c7222b1e 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -52,10 +52,6 @@ void paging_init(void);
52#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) 52#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
53#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) 53#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
54 54
55#define TWOLEVEL_PGDIR_SHIFT 22
56#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
57#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
58
59/* Just any arbitrary offset to the start of the vmalloc VM area: the 55/* Just any arbitrary offset to the start of the vmalloc VM area: the
60 * current 8MB value just means that there will be a 8MB "hole" after the 56 * current 8MB value just means that there will be a 8MB "hole" after the
61 * physical memory until the kernel virtual memory starts. That means that 57 * physical memory until the kernel virtual memory starts. That means that
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 0ab3a3232330..974f5b7bb205 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -5,15 +5,6 @@
5/* These definitions are for GCC v4.x. */ 5/* These definitions are for GCC v4.x. */
6#include <linux/compiler-gcc.h> 6#include <linux/compiler-gcc.h>
7 7
8#ifdef CONFIG_FORCED_INLINING
9# undef inline
10# undef __inline__
11# undef __inline
12# define inline inline __attribute__((always_inline))
13# define __inline__ __inline__ __attribute__((always_inline))
14# define __inline __inline __attribute__((always_inline))
15#endif
16
17#define __used __attribute__((__used__)) 8#define __used __attribute__((__used__))
18#define __must_check __attribute__((warn_unused_result)) 9#define __must_check __attribute__((warn_unused_result))
19#define __compiler_offsetof(a,b) __builtin_offsetof(a,b) 10#define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
diff --git a/init/Kconfig b/init/Kconfig
index 455170e1c1e3..824d48cb67bf 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -587,7 +587,7 @@ config COMPAT_BRK
587 disabled, and can be overriden runtime by setting 587 disabled, and can be overriden runtime by setting
588 /proc/sys/kernel/randomize_va_space to 2. 588 /proc/sys/kernel/randomize_va_space to 2.
589 589
590 On non-ancient distros (post-2000 ones) Y is usually a safe choice. 590 On non-ancient distros (post-2000 ones) N is usually a safe choice.
591 591
592config BASE_FULL 592config BASE_FULL
593 default y 593 default y
diff --git a/init/main.c b/init/main.c
index c59859b85db0..8b1982082ad8 100644
--- a/init/main.c
+++ b/init/main.c
@@ -558,7 +558,6 @@ asmlinkage void __init start_kernel(void)
558 preempt_disable(); 558 preempt_disable();
559 build_all_zonelists(); 559 build_all_zonelists();
560 page_alloc_init(); 560 page_alloc_init();
561 enable_debug_pagealloc();
562 printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); 561 printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
563 parse_early_param(); 562 parse_early_param();
564 parse_args("Booting kernel", static_command_line, __start___param, 563 parse_args("Booting kernel", static_command_line, __start___param,
@@ -614,6 +613,7 @@ asmlinkage void __init start_kernel(void)
614 vfs_caches_init_early(); 613 vfs_caches_init_early();
615 cpuset_init_early(); 614 cpuset_init_early();
616 mem_init(); 615 mem_init();
616 enable_debug_pagealloc();
617 cpu_hotplug_init(); 617 cpu_hotplug_init();
618 kmem_cache_init(); 618 kmem_cache_init();
619 setup_per_cpu_pageset(); 619 setup_per_cpu_pageset();
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ce0bb2600c25..a370fe828a79 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -465,20 +465,6 @@ config FRAME_POINTER
465 some architectures or if you use external debuggers. 465 some architectures or if you use external debuggers.
466 If you don't debug the kernel, you can say N. 466 If you don't debug the kernel, you can say N.
467 467
468config FORCED_INLINING
469 bool "Force gcc to inline functions marked 'inline'"
470 depends on DEBUG_KERNEL
471 default y
472 help
473 This option determines if the kernel forces gcc to inline the functions
474 developers have marked 'inline'. Doing so takes away freedom from gcc to
475 do what it thinks is best, which is desirable for the gcc 3.x series of
476 compilers. The gcc 4.x series have a rewritten inlining algorithm and
477 disabling this option will generate a smaller kernel there. Hopefully
478 this algorithm is so good that allowing gcc4 to make the decision can
479 become the default in the future, until then this option is there to
480 test gcc for this.
481
482config BOOT_PRINTK_DELAY 468config BOOT_PRINTK_DELAY
483 bool "Delay each boot printk message by N milliseconds" 469 bool "Delay each boot printk message by N milliseconds"
484 depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY 470 depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 419993f58c6b..fd987b17bda7 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -26,6 +26,9 @@
26#include <asm/page.h> /* for PAGE_SIZE */ 26#include <asm/page.h> /* for PAGE_SIZE */
27#include <asm/div64.h> 27#include <asm/div64.h>
28 28
29/* Works only for digits and letters, but small and fast */
30#define TOLOWER(x) ((x) | 0x20)
31
29/** 32/**
30 * simple_strtoul - convert a string to an unsigned long 33 * simple_strtoul - convert a string to an unsigned long
31 * @cp: The start of the string 34 * @cp: The start of the string
@@ -41,17 +44,17 @@ unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
41 if (*cp == '0') { 44 if (*cp == '0') {
42 base = 8; 45 base = 8;
43 cp++; 46 cp++;
44 if ((toupper(*cp) == 'X') && isxdigit(cp[1])) { 47 if ((TOLOWER(*cp) == 'x') && isxdigit(cp[1])) {
45 cp++; 48 cp++;
46 base = 16; 49 base = 16;
47 } 50 }
48 } 51 }
49 } else if (base == 16) { 52 } else if (base == 16) {
50 if (cp[0] == '0' && toupper(cp[1]) == 'X') 53 if (cp[0] == '0' && TOLOWER(cp[1]) == 'x')
51 cp += 2; 54 cp += 2;
52 } 55 }
53 while (isxdigit(*cp) && 56 while (isxdigit(*cp) &&
54 (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { 57 (value = isdigit(*cp) ? *cp-'0' : TOLOWER(*cp)-'a'+10) < base) {
55 result = result*base + value; 58 result = result*base + value;
56 cp++; 59 cp++;
57 } 60 }
@@ -92,17 +95,17 @@ unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base)
92 if (*cp == '0') { 95 if (*cp == '0') {
93 base = 8; 96 base = 8;
94 cp++; 97 cp++;
95 if ((toupper(*cp) == 'X') && isxdigit(cp[1])) { 98 if ((TOLOWER(*cp) == 'x') && isxdigit(cp[1])) {
96 cp++; 99 cp++;
97 base = 16; 100 base = 16;
98 } 101 }
99 } 102 }
100 } else if (base == 16) { 103 } else if (base == 16) {
101 if (cp[0] == '0' && toupper(cp[1]) == 'X') 104 if (cp[0] == '0' && TOLOWER(cp[1]) == 'x')
102 cp += 2; 105 cp += 2;
103 } 106 }
104 while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp) 107 while (isxdigit(*cp)
105 ? toupper(*cp) : *cp)-'A'+10) < base) { 108 && (value = isdigit(*cp) ? *cp-'0' : TOLOWER(*cp)-'a'+10) < base) {
106 result = result*base + value; 109 result = result*base + value;
107 cp++; 110 cp++;
108 } 111 }
@@ -360,24 +363,25 @@ static noinline char* put_dec(char *buf, unsigned long long num)
360#define PLUS 4 /* show plus */ 363#define PLUS 4 /* show plus */
361#define SPACE 8 /* space if plus */ 364#define SPACE 8 /* space if plus */
362#define LEFT 16 /* left justified */ 365#define LEFT 16 /* left justified */
363#define SPECIAL 32 /* 0x */ 366#define SMALL 32 /* Must be 32 == 0x20 */
364#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ 367#define SPECIAL 64 /* 0x */
365 368
366static char *number(char *buf, char *end, unsigned long long num, int base, int size, int precision, int type) 369static char *number(char *buf, char *end, unsigned long long num, int base, int size, int precision, int type)
367{ 370{
368 char sign,tmp[66]; 371 /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
369 const char *digits; 372 static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
370 /* we are called with base 8, 10 or 16, only, thus don't need "g..." */ 373
371 static const char small_digits[] = "0123456789abcdefx"; /* "ghijklmnopqrstuvwxyz"; */ 374 char tmp[66];
372 static const char large_digits[] = "0123456789ABCDEFX"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ 375 char sign;
376 char locase;
373 int need_pfx = ((type & SPECIAL) && base != 10); 377 int need_pfx = ((type & SPECIAL) && base != 10);
374 int i; 378 int i;
375 379
376 digits = (type & LARGE) ? large_digits : small_digits; 380 /* locase = 0 or 0x20. ORing digits or letters with 'locase'
381 * produces same digits or (maybe lowercased) letters */
382 locase = (type & SMALL);
377 if (type & LEFT) 383 if (type & LEFT)
378 type &= ~ZEROPAD; 384 type &= ~ZEROPAD;
379 if (base < 2 || base > 36)
380 return NULL;
381 sign = 0; 385 sign = 0;
382 if (type & SIGN) { 386 if (type & SIGN) {
383 if ((signed long long) num < 0) { 387 if ((signed long long) num < 0) {
@@ -404,7 +408,7 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
404 tmp[i++] = '0'; 408 tmp[i++] = '0';
405 /* Generic code, for any base: 409 /* Generic code, for any base:
406 else do { 410 else do {
407 tmp[i++] = digits[do_div(num,base)]; 411 tmp[i++] = (digits[do_div(num,base)] | locase);
408 } while (num != 0); 412 } while (num != 0);
409 */ 413 */
410 else if (base != 10) { /* 8 or 16 */ 414 else if (base != 10) { /* 8 or 16 */
@@ -412,7 +416,7 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
412 int shift = 3; 416 int shift = 3;
413 if (base == 16) shift = 4; 417 if (base == 16) shift = 4;
414 do { 418 do {
415 tmp[i++] = digits[((unsigned char)num) & mask]; 419 tmp[i++] = (digits[((unsigned char)num) & mask] | locase);
416 num >>= shift; 420 num >>= shift;
417 } while (num); 421 } while (num);
418 } else { /* base 10 */ 422 } else { /* base 10 */
@@ -444,7 +448,7 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
444 ++buf; 448 ++buf;
445 if (base == 16) { 449 if (base == 16) {
446 if (buf < end) 450 if (buf < end)
447 *buf = digits[16]; /* for arbitrary base: digits[33]; */ 451 *buf = ('X' | locase);
448 ++buf; 452 ++buf;
449 } 453 }
450 } 454 }
@@ -644,6 +648,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
644 continue; 648 continue;
645 649
646 case 'p': 650 case 'p':
651 flags |= SMALL;
647 if (field_width == -1) { 652 if (field_width == -1) {
648 field_width = 2*sizeof(void *); 653 field_width = 2*sizeof(void *);
649 flags |= ZEROPAD; 654 flags |= ZEROPAD;
@@ -680,9 +685,9 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
680 base = 8; 685 base = 8;
681 break; 686 break;
682 687
683 case 'X':
684 flags |= LARGE;
685 case 'x': 688 case 'x':
689 flags |= SMALL;
690 case 'X':
686 base = 16; 691 base = 16;
687 break; 692 break;
688 693