aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/cnt32_to_63.h78
-rw-r--r--arch/arm/kernel/kgdb.c2
-rw-r--r--arch/arm/mach-davinci/psc.c3
-rw-r--r--arch/arm/mach-pxa/time.c2
-rw-r--r--arch/arm/mach-sa1100/generic.c2
-rw-r--r--arch/arm/mach-sa1100/include/mach/jornada720.h11
-rw-r--r--arch/arm/mach-sa1100/jornada720_ssp.c10
-rw-r--r--arch/arm/mach-versatile/core.c2
-rw-r--r--arch/arm/plat-omap/devices.c13
-rw-r--r--arch/avr32/boards/atstk1000/atstk1002.c2
-rw-r--r--arch/avr32/boot/images/.gitignore4
-rw-r--r--arch/avr32/kernel/.gitignore1
-rw-r--r--arch/avr32/kernel/avr32_ksyms.c1
-rw-r--r--arch/avr32/kernel/syscall-stubs.S9
-rw-r--r--arch/avr32/kernel/syscall_table.S2
-rw-r--r--arch/avr32/kernel/traps.c8
-rw-r--r--arch/avr32/lib/findbit.S30
-rw-r--r--arch/ia64/include/asm/sections.h3
-rw-r--r--arch/ia64/kernel/efi.c5
-rw-r--r--arch/ia64/kernel/head.S9
-rw-r--r--arch/ia64/kernel/setup.c2
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S8
-rw-r--r--arch/ia64/kvm/kvm-ia64.c7
-rw-r--r--arch/ia64/mm/contig.c2
-rw-r--r--arch/ia64/mm/discontig.c2
-rw-r--r--arch/m32r/Kconfig10
-rw-r--r--arch/m32r/kernel/entry.S2
-rw-r--r--arch/m32r/kernel/head.S1
-rw-r--r--arch/m32r/kernel/irq.c6
-rw-r--r--arch/m32r/kernel/m32r_ksyms.c2
-rw-r--r--arch/m32r/kernel/process.c30
-rw-r--r--arch/m32r/kernel/smp.c4
-rw-r--r--arch/m32r/kernel/time.c5
-rw-r--r--arch/m32r/kernel/traps.c8
-rw-r--r--arch/m32r/lib/delay.c2
-rw-r--r--arch/mips/Kconfig53
-rw-r--r--arch/mips/au1000/common/gpio.c6
-rw-r--r--arch/mips/kernel/Makefile1
-rw-r--r--arch/mips/kernel/cevt-r4k.c173
-rw-r--r--arch/mips/kernel/cevt-smtc.c321
-rw-r--r--arch/mips/kernel/cpu-probe.c26
-rw-r--r--arch/mips/kernel/entry.S10
-rw-r--r--arch/mips/kernel/genex.S41
-rw-r--r--arch/mips/kernel/head.S1
-rw-r--r--arch/mips/kernel/kgdb.c3
-rw-r--r--arch/mips/kernel/mips-mt-fpaff.c2
-rw-r--r--arch/mips/kernel/process.c19
-rw-r--r--arch/mips/kernel/ptrace.c2
-rw-r--r--arch/mips/kernel/smtc.c260
-rw-r--r--arch/mips/kernel/traps.c28
-rw-r--r--arch/mips/kernel/vmlinux.lds.S1
-rw-r--r--arch/mips/lib/csum_partial.S21
-rw-r--r--arch/mips/mti-malta/Makefile2
-rw-r--r--arch/mips/mti-malta/malta-smtc.c9
-rw-r--r--arch/mips/pci/Makefile1
-rw-r--r--arch/mips/pci/pci-bcm47xx.c60
-rw-r--r--arch/mips/pci/pci-ip27.c40
-rw-r--r--arch/mips/sibyte/swarm/Makefile3
-rw-r--r--arch/mips/sibyte/swarm/platform.c85
-rw-r--r--arch/mips/vr41xx/common/irq.c6
-rw-r--r--arch/mn10300/kernel/irq.c71
-rw-r--r--arch/mn10300/kernel/time.c52
-rw-r--r--arch/mn10300/unit-asb2303/unit-init.c2
-rw-r--r--arch/mn10300/unit-asb2305/unit-init.c2
-rw-r--r--arch/powerpc/boot/Makefile2
-rw-r--r--arch/powerpc/boot/dts/holly.dts106
-rw-r--r--arch/powerpc/boot/dts/mpc8610_hpcd.dts8
-rw-r--r--arch/powerpc/include/asm/elf.h7
-rw-r--r--arch/powerpc/include/asm/sections.h12
-rw-r--r--arch/powerpc/kernel/idle.c6
-rw-r--r--arch/powerpc/kernel/kgdb.c5
-rw-r--r--arch/powerpc/kernel/module_64.c19
-rw-r--r--arch/powerpc/platforms/fsl_uli1575.c12
-rw-r--r--arch/s390/kernel/time.c2
-rw-r--r--arch/s390/lib/delay.c88
-rw-r--r--arch/sparc64/kernel/irq.c5
-rw-r--r--arch/sparc64/kernel/of_device.c9
-rw-r--r--arch/sparc64/kernel/pci.c2
-rw-r--r--arch/sparc64/kernel/pci_psycho.c8
-rw-r--r--arch/sparc64/kernel/traps.c3
-rw-r--r--arch/x86/Kconfig75
-rw-r--r--arch/x86/Kconfig.cpu18
-rw-r--r--arch/x86/boot/compressed/head_32.S5
-rw-r--r--arch/x86/boot/compressed/misc.c12
-rw-r--r--arch/x86/boot/compressed/relocs.c2
-rw-r--r--arch/x86/boot/header.S1
-rw-r--r--arch/x86/configs/i386_defconfig19
-rw-r--r--arch/x86/configs/x86_64_defconfig29
-rw-r--r--arch/x86/ia32/ia32_aout.c11
-rw-r--r--arch/x86/ia32/ia32_signal.c21
-rw-r--r--arch/x86/ia32/sys_ia32.c9
-rw-r--r--arch/x86/kernel/acpi/boot.c21
-rw-r--r--arch/x86/kernel/alternative.c8
-rw-r--r--arch/x86/kernel/amd_iommu.c22
-rw-r--r--arch/x86/kernel/aperture_64.c6
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/bios_uv.c10
-rw-r--r--arch/x86/kernel/cpu/common.c24
-rw-r--r--arch/x86/kernel/cpu/common_64.c51
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c7
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c276
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c86
-rw-r--r--arch/x86/kernel/cpuid.c1
-rw-r--r--arch/x86/kernel/crash_dump_64.c13
-rw-r--r--arch/x86/kernel/ds.c954
-rw-r--r--arch/x86/kernel/efi.c6
-rw-r--r--arch/x86/kernel/entry_64.S4
-rw-r--r--arch/x86/kernel/head64.c5
-rw-r--r--arch/x86/kernel/head_32.S34
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/ioport.c1
-rw-r--r--arch/x86/kernel/ipi.c3
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/kdebugfs.c1
-rw-r--r--arch/x86/kernel/kgdb.c50
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/ldt.c1
-rw-r--r--arch/x86/kernel/nmi.c11
-rw-r--r--arch/x86/kernel/olpc.c6
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c2
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/x86/kernel/pci-gart_64.c20
-rw-r--r--arch/x86/kernel/pcspeaker.c13
-rw-r--r--arch/x86/kernel/process.c20
-rw-r--r--arch/x86/kernel/process_32.c64
-rw-r--r--arch/x86/kernel/process_64.c172
-rw-r--r--arch/x86/kernel/ptrace.c480
-rw-r--r--arch/x86/kernel/reboot.c6
-rw-r--r--arch/x86/kernel/setup.c21
-rw-r--r--arch/x86/kernel/setup_percpu.c9
-rw-r--r--arch/x86/kernel/sigframe.h5
-rw-r--r--arch/x86/kernel/signal_32.c12
-rw-r--r--arch/x86/kernel/signal_64.c112
-rw-r--r--arch/x86/kernel/smpboot.c9
-rw-r--r--arch/x86/kernel/sys_i386_32.c2
-rw-r--r--arch/x86/kernel/sys_x86_64.c44
-rw-r--r--arch/x86/kernel/syscall_64.c4
-rw-r--r--arch/x86/kernel/time_32.c1
-rw-r--r--arch/x86/kernel/tls.c1
-rw-r--r--arch/x86/kernel/traps_64.c66
-rw-r--r--arch/x86/kernel/tsc.c290
-rw-r--r--arch/x86/kernel/visws_quirks.c16
-rw-r--r--arch/x86/kernel/vm86_32.c1
-rw-r--r--arch/x86/kernel/vmi_32.c12
-rw-r--r--arch/x86/kernel/vsmp_64.c2
-rw-r--r--arch/x86/lib/msr-on-cpu.c78
-rw-r--r--arch/x86/lib/string_32.c42
-rw-r--r--arch/x86/lib/strstr_32.c6
-rw-r--r--arch/x86/mach-default/setup.c4
-rw-r--r--arch/x86/mm/discontig_32.c2
-rw-r--r--arch/x86/mm/dump_pagetables.c4
-rw-r--r--arch/x86/mm/fault.c3
-rw-r--r--arch/x86/mm/init_32.c89
-rw-r--r--arch/x86/mm/init_64.c118
-rw-r--r--arch/x86/mm/ioremap.c23
-rw-r--r--arch/x86/mm/numa_64.c10
-rw-r--r--arch/x86/mm/pageattr-test.c9
-rw-r--r--arch/x86/mm/pageattr.c463
-rw-r--r--arch/x86/mm/pat.c132
-rw-r--r--arch/x86/mm/pgtable.c6
-rw-r--r--arch/x86/mm/pgtable_32.c3
-rw-r--r--arch/x86/oprofile/nmi_int.c4
-rw-r--r--arch/x86/oprofile/op_model_p4.c175
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/irq.c67
-rw-r--r--arch/x86/power/hibernate_asm_32.S14
-rw-r--r--arch/x86/xen/enlighten.c20
-rw-r--r--arch/x86/xen/setup.c2
175 files changed, 4078 insertions, 2298 deletions
diff --git a/arch/arm/include/asm/cnt32_to_63.h b/arch/arm/include/asm/cnt32_to_63.h
deleted file mode 100644
index 480c873fa746..000000000000
--- a/arch/arm/include/asm/cnt32_to_63.h
+++ /dev/null
@@ -1,78 +0,0 @@
1/*
2 * include/asm/cnt32_to_63.h -- extend a 32-bit counter to 63 bits
3 *
4 * Author: Nicolas Pitre
5 * Created: December 3, 2006
6 * Copyright: MontaVista Software, Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation.
11 */
12
13#ifndef __INCLUDE_CNT32_TO_63_H__
14#define __INCLUDE_CNT32_TO_63_H__
15
16#include <linux/compiler.h>
17#include <asm/types.h>
18#include <asm/byteorder.h>
19
20/*
21 * Prototype: u64 cnt32_to_63(u32 cnt)
22 * Many hardware clock counters are only 32 bits wide and therefore have
23 * a relatively short period making wrap-arounds rather frequent. This
24 * is a problem when implementing sched_clock() for example, where a 64-bit
25 * non-wrapping monotonic value is expected to be returned.
26 *
27 * To overcome that limitation, let's extend a 32-bit counter to 63 bits
28 * in a completely lock free fashion. Bits 0 to 31 of the clock are provided
29 * by the hardware while bits 32 to 62 are stored in memory. The top bit in
30 * memory is used to synchronize with the hardware clock half-period. When
31 * the top bit of both counters (hardware and in memory) differ then the
32 * memory is updated with a new value, incrementing it when the hardware
33 * counter wraps around.
34 *
35 * Because a word store in memory is atomic then the incremented value will
36 * always be in synch with the top bit indicating to any potential concurrent
37 * reader if the value in memory is up to date or not with regards to the
38 * needed increment. And any race in updating the value in memory is harmless
39 * as the same value would simply be stored more than once.
40 *
41 * The only restriction for the algorithm to work properly is that this
42 * code must be executed at least once per each half period of the 32-bit
43 * counter to properly update the state bit in memory. This is usually not a
44 * problem in practice, but if it is then a kernel timer could be scheduled
45 * to manage for this code to be executed often enough.
46 *
47 * Note that the top bit (bit 63) in the returned value should be considered
48 * as garbage. It is not cleared here because callers are likely to use a
49 * multiplier on the returned value which can get rid of the top bit
50 * implicitly by making the multiplier even, therefore saving on a runtime
51 * clear-bit instruction. Otherwise caller must remember to clear the top
52 * bit explicitly.
53 */
54
55/* this is used only to give gcc a clue about good code generation */
56typedef union {
57 struct {
58#if defined(__LITTLE_ENDIAN)
59 u32 lo, hi;
60#elif defined(__BIG_ENDIAN)
61 u32 hi, lo;
62#endif
63 };
64 u64 val;
65} cnt32_to_63_t;
66
67#define cnt32_to_63(cnt_lo) \
68({ \
69 static volatile u32 __m_cnt_hi = 0; \
70 cnt32_to_63_t __x; \
71 __x.hi = __m_cnt_hi; \
72 __x.lo = (cnt_lo); \
73 if (unlikely((s32)(__x.hi ^ __x.lo) < 0)) \
74 __m_cnt_hi = __x.hi = (__x.hi ^ 0x80000000) + (__x.hi >> 31); \
75 __x.val; \
76})
77
78#endif
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index aaffaecffcd1..ba8ccfede964 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -111,8 +111,6 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
111 case 'D': 111 case 'D':
112 case 'k': 112 case 'k':
113 case 'c': 113 case 'c':
114 kgdb_contthread = NULL;
115
116 /* 114 /*
117 * Try to read optional parameter, pc unchanged if no parm. 115 * Try to read optional parameter, pc unchanged if no parm.
118 * If this was a compiled breakpoint, we need to move 116 * If this was a compiled breakpoint, we need to move
diff --git a/arch/arm/mach-davinci/psc.c b/arch/arm/mach-davinci/psc.c
index 720c48b9ee04..aa2fc375a325 100644
--- a/arch/arm/mach-davinci/psc.c
+++ b/arch/arm/mach-davinci/psc.c
@@ -70,9 +70,6 @@ void davinci_psc_config(unsigned int domain, unsigned int id, char enable)
70{ 70{
71 u32 epcpr, ptcmd, ptstat, pdstat, pdctl1, mdstat, mdctl, mdstat_mask; 71 u32 epcpr, ptcmd, ptstat, pdstat, pdctl1, mdstat, mdctl, mdstat_mask;
72 72
73 if (id < 0)
74 return;
75
76 mdctl = davinci_readl(DAVINCI_PWR_SLEEP_CNTRL_BASE + MDCTL + 4 * id); 73 mdctl = davinci_readl(DAVINCI_PWR_SLEEP_CNTRL_BASE + MDCTL + 4 * id);
77 if (enable) 74 if (enable)
78 mdctl |= 0x00000003; /* Enable Module */ 75 mdctl |= 0x00000003; /* Enable Module */
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index 67e18509d7bf..b0d6b32654cf 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -17,9 +17,9 @@
17#include <linux/interrupt.h> 17#include <linux/interrupt.h>
18#include <linux/clockchips.h> 18#include <linux/clockchips.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/cnt32_to_63.h>
20 21
21#include <asm/div64.h> 22#include <asm/div64.h>
22#include <asm/cnt32_to_63.h>
23#include <asm/mach/irq.h> 23#include <asm/mach/irq.h>
24#include <asm/mach/time.h> 24#include <asm/mach/time.h>
25#include <mach/pxa-regs.h> 25#include <mach/pxa-regs.h>
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 1362994c78aa..b422526f6d8b 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -18,9 +18,9 @@
18#include <linux/ioport.h> 18#include <linux/ioport.h>
19#include <linux/sched.h> /* just for sched_clock() - funny that */ 19#include <linux/sched.h> /* just for sched_clock() - funny that */
20#include <linux/platform_device.h> 20#include <linux/platform_device.h>
21#include <linux/cnt32_to_63.h>
21 22
22#include <asm/div64.h> 23#include <asm/div64.h>
23#include <asm/cnt32_to_63.h>
24#include <mach/hardware.h> 24#include <mach/hardware.h>
25#include <asm/system.h> 25#include <asm/system.h>
26#include <asm/pgtable.h> 26#include <asm/pgtable.h>
diff --git a/arch/arm/mach-sa1100/include/mach/jornada720.h b/arch/arm/mach-sa1100/include/mach/jornada720.h
index bc120850d313..cc6b4bfcecf6 100644
--- a/arch/arm/mach-sa1100/include/mach/jornada720.h
+++ b/arch/arm/mach-sa1100/include/mach/jornada720.h
@@ -1,10 +1,10 @@
1/* 1/*
2 * arch/arm/mach-sa1100/include/mach/jornada720.h 2 * arch/arm/mach-sa1100/include/mach/jornada720.h
3 * 3 *
4 * This file contains SSP/MCU communication definitions for HP Jornada 710/720/728 4 * SSP/MCU communication definitions for HP Jornada 710/720/728
5 * 5 *
6 * Copyright (C) 2007 Kristoffer Ericson <Kristoffer.Ericson@gmail.com> 6 * Copyright 2007,2008 Kristoffer Ericson <Kristoffer.Ericson@gmail.com>
7 * Copyright (C) 2000 John Ankcorn <jca@lcs.mit.edu> 7 * Copyright 2000 John Ankcorn <jca@lcs.mit.edu>
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
@@ -25,3 +25,8 @@
25#define PWMOFF 0xDF 25#define PWMOFF 0xDF
26#define TXDUMMY 0x11 26#define TXDUMMY 0x11
27#define ERRORCODE 0x00 27#define ERRORCODE 0x00
28
29extern void jornada_ssp_start(void);
30extern void jornada_ssp_end(void);
31extern int jornada_ssp_inout(u8 byte);
32extern int jornada_ssp_byte(u8 byte);
diff --git a/arch/arm/mach-sa1100/jornada720_ssp.c b/arch/arm/mach-sa1100/jornada720_ssp.c
index 06ea7abd9170..28cf36967977 100644
--- a/arch/arm/mach-sa1100/jornada720_ssp.c
+++ b/arch/arm/mach-sa1100/jornada720_ssp.c
@@ -21,8 +21,8 @@
21#include <linux/slab.h> 21#include <linux/slab.h>
22 22
23#include <mach/hardware.h> 23#include <mach/hardware.h>
24#include <asm/hardware/ssp.h>
25#include <mach/jornada720.h> 24#include <mach/jornada720.h>
25#include <asm/hardware/ssp.h>
26 26
27static DEFINE_SPINLOCK(jornada_ssp_lock); 27static DEFINE_SPINLOCK(jornada_ssp_lock);
28static unsigned long jornada_ssp_flags; 28static unsigned long jornada_ssp_flags;
@@ -109,12 +109,12 @@ EXPORT_SYMBOL(jornada_ssp_inout);
109 * jornada_ssp_start - enable mcu 109 * jornada_ssp_start - enable mcu
110 * 110 *
111 */ 111 */
112int jornada_ssp_start() 112void jornada_ssp_start(void)
113{ 113{
114 spin_lock_irqsave(&jornada_ssp_lock, jornada_ssp_flags); 114 spin_lock_irqsave(&jornada_ssp_lock, jornada_ssp_flags);
115 GPCR = GPIO_GPIO25; 115 GPCR = GPIO_GPIO25;
116 udelay(50); 116 udelay(50);
117 return 0; 117 return;
118}; 118};
119EXPORT_SYMBOL(jornada_ssp_start); 119EXPORT_SYMBOL(jornada_ssp_start);
120 120
@@ -122,11 +122,11 @@ EXPORT_SYMBOL(jornada_ssp_start);
122 * jornada_ssp_end - disable mcu and turn off lock 122 * jornada_ssp_end - disable mcu and turn off lock
123 * 123 *
124 */ 124 */
125int jornada_ssp_end() 125void jornada_ssp_end(void)
126{ 126{
127 GPSR = GPIO_GPIO25; 127 GPSR = GPIO_GPIO25;
128 spin_unlock_irqrestore(&jornada_ssp_lock, jornada_ssp_flags); 128 spin_unlock_irqrestore(&jornada_ssp_lock, jornada_ssp_flags);
129 return 0; 129 return;
130}; 130};
131EXPORT_SYMBOL(jornada_ssp_end); 131EXPORT_SYMBOL(jornada_ssp_end);
132 132
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index d75e795c893e..b638f10411e8 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -28,8 +28,8 @@
28#include <linux/amba/clcd.h> 28#include <linux/amba/clcd.h>
29#include <linux/clocksource.h> 29#include <linux/clocksource.h>
30#include <linux/clockchips.h> 30#include <linux/clockchips.h>
31#include <linux/cnt32_to_63.h>
31 32
32#include <asm/cnt32_to_63.h>
33#include <asm/system.h> 33#include <asm/system.h>
34#include <mach/hardware.h> 34#include <mach/hardware.h>
35#include <asm/io.h> 35#include <asm/io.h>
diff --git a/arch/arm/plat-omap/devices.c b/arch/arm/plat-omap/devices.c
index bc1cf30c83e0..01da719a7453 100644
--- a/arch/arm/plat-omap/devices.c
+++ b/arch/arm/plat-omap/devices.c
@@ -316,19 +316,6 @@ static inline void omap_init_mmc_conf(const struct omap_mmc_config *mmc_conf)
316 omap_cfg_reg(MMC_DAT3); 316 omap_cfg_reg(MMC_DAT3);
317 } 317 }
318 } 318 }
319#if defined(CONFIG_ARCH_OMAP2420)
320 if (mmc_conf->mmc[0].internal_clock) {
321 /*
322 * Use internal loop-back in MMC/SDIO
323 * Module Input Clock selection
324 */
325 if (cpu_is_omap24xx()) {
326 u32 v = omap_ctrl_readl(OMAP2_CONTROL_DEVCONF0);
327 v |= (1 << 24); /* not used in 243x */
328 omap_ctrl_writel(v, OMAP2_CONTROL_DEVCONF0);
329 }
330 }
331#endif
332 } 319 }
333 320
334#ifdef CONFIG_ARCH_OMAP16XX 321#ifdef CONFIG_ARCH_OMAP16XX
diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c
index ee4c292683e1..dfc3443e23aa 100644
--- a/arch/avr32/boards/atstk1000/atstk1002.c
+++ b/arch/avr32/boards/atstk1000/atstk1002.c
@@ -325,7 +325,7 @@ static int __init atstk1002_init(void)
325#ifdef CONFIG_BOARD_ATSTK100X_SPI1 325#ifdef CONFIG_BOARD_ATSTK100X_SPI1
326 at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info)); 326 at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info));
327#endif 327#endif
328#ifndef CONFIG_BOARD_ATSTK1002_SW2_CUSTOM 328#ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
329 at32_add_device_mci(0, MCI_PDATA); 329 at32_add_device_mci(0, MCI_PDATA);
330#endif 330#endif
331#ifdef CONFIG_BOARD_ATSTK1002_SW5_CUSTOM 331#ifdef CONFIG_BOARD_ATSTK1002_SW5_CUSTOM
diff --git a/arch/avr32/boot/images/.gitignore b/arch/avr32/boot/images/.gitignore
new file mode 100644
index 000000000000..64ea9d0141d2
--- /dev/null
+++ b/arch/avr32/boot/images/.gitignore
@@ -0,0 +1,4 @@
1uImage
2uImage.srec
3vmlinux.cso
4sfdwarf.log
diff --git a/arch/avr32/kernel/.gitignore b/arch/avr32/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/avr32/kernel/.gitignore
@@ -0,0 +1 @@
vmlinux.lds
diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c
index 84a7d44edc67..11e310c567a9 100644
--- a/arch/avr32/kernel/avr32_ksyms.c
+++ b/arch/avr32/kernel/avr32_ksyms.c
@@ -58,6 +58,7 @@ EXPORT_SYMBOL(find_first_zero_bit);
58EXPORT_SYMBOL(find_next_zero_bit); 58EXPORT_SYMBOL(find_next_zero_bit);
59EXPORT_SYMBOL(find_first_bit); 59EXPORT_SYMBOL(find_first_bit);
60EXPORT_SYMBOL(find_next_bit); 60EXPORT_SYMBOL(find_next_bit);
61EXPORT_SYMBOL(generic_find_next_le_bit);
61EXPORT_SYMBOL(generic_find_next_zero_le_bit); 62EXPORT_SYMBOL(generic_find_next_zero_le_bit);
62 63
63/* I/O primitives (lib/io-*.S) */ 64/* I/O primitives (lib/io-*.S) */
diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S
index 890286a1e62b..673178e235f3 100644
--- a/arch/avr32/kernel/syscall-stubs.S
+++ b/arch/avr32/kernel/syscall-stubs.S
@@ -109,3 +109,12 @@ __sys_epoll_pwait:
109 rcall sys_epoll_pwait 109 rcall sys_epoll_pwait
110 sub sp, -4 110 sub sp, -4
111 popm pc 111 popm pc
112
113 .global __sys_sync_file_range
114 .type __sys_sync_file_range,@function
115__sys_sync_file_range:
116 pushm lr
117 st.w --sp, ARG6
118 rcall sys_sync_file_range
119 sub sp, -4
120 popm pc
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index 478bda4c4a09..7ee0057613b3 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -275,7 +275,7 @@ sys_call_table:
275 .long sys_set_robust_list 275 .long sys_set_robust_list
276 .long sys_get_robust_list /* 260 */ 276 .long sys_get_robust_list /* 260 */
277 .long __sys_splice 277 .long __sys_splice
278 .long sys_sync_file_range 278 .long __sys_sync_file_range
279 .long sys_tee 279 .long sys_tee
280 .long sys_vmsplice 280 .long sys_vmsplice
281 .long __sys_epoll_pwait /* 265 */ 281 .long __sys_epoll_pwait /* 265 */
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index b835c4c01368..0d987373bc01 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -116,15 +116,15 @@ asmlinkage void do_nmi(unsigned long ecr, struct pt_regs *regs)
116 switch (ret) { 116 switch (ret) {
117 case NOTIFY_OK: 117 case NOTIFY_OK:
118 case NOTIFY_STOP: 118 case NOTIFY_STOP:
119 return; 119 break;
120 case NOTIFY_BAD: 120 case NOTIFY_BAD:
121 die("Fatal Non-Maskable Interrupt", regs, SIGINT); 121 die("Fatal Non-Maskable Interrupt", regs, SIGINT);
122 default: 122 default:
123 printk(KERN_ALERT "Got NMI, but nobody cared. Disabling...\n");
124 nmi_disable();
123 break; 125 break;
124 } 126 }
125 127 nmi_exit();
126 printk(KERN_ALERT "Got NMI, but nobody cared. Disabling...\n");
127 nmi_disable();
128} 128}
129 129
130asmlinkage void do_critical_exception(unsigned long ecr, struct pt_regs *regs) 130asmlinkage void do_critical_exception(unsigned long ecr, struct pt_regs *regs)
diff --git a/arch/avr32/lib/findbit.S b/arch/avr32/lib/findbit.S
index c6b91dee857c..997b33b2288a 100644
--- a/arch/avr32/lib/findbit.S
+++ b/arch/avr32/lib/findbit.S
@@ -123,6 +123,36 @@ ENTRY(find_next_bit)
123 brgt 1b 123 brgt 1b
124 retal r11 124 retal r11
125 125
126ENTRY(generic_find_next_le_bit)
127 lsr r8, r10, 5
128 sub r9, r11, r10
129 retle r11
130
131 lsl r8, 2
132 add r12, r8
133 andl r10, 31, COH
134 breq 1f
135
136 /* offset is not word-aligned. Handle the first (32 - r10) bits */
137 ldswp.w r8, r12[0]
138 sub r12, -4
139 lsr r8, r8, r10
140 brne .L_found
141
142 /* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
143 add r9, r10
144 sub r9, 32
145 retle r11
146
147 /* Main loop. offset must be word-aligned */
1481: ldswp.w r8, r12[0]
149 cp.w r8, 0
150 brne .L_found
151 sub r12, -4
152 sub r9, 32
153 brgt 1b
154 retal r11
155
126ENTRY(generic_find_next_zero_le_bit) 156ENTRY(generic_find_next_zero_le_bit)
127 lsr r8, r10, 5 157 lsr r8, r10, 5
128 sub r9, r11, r10 158 sub r9, r11, r10
diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h
index f66799891036..1a873b36a4a1 100644
--- a/arch/ia64/include/asm/sections.h
+++ b/arch/ia64/include/asm/sections.h
@@ -11,6 +11,9 @@
11#include <asm-generic/sections.h> 11#include <asm-generic/sections.h>
12 12
13extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[]; 13extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
14#ifdef CONFIG_SMP
15extern char __cpu0_per_cpu[];
16#endif
14extern char __start___vtop_patchlist[], __end___vtop_patchlist[]; 17extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
15extern char __start___rse_patchlist[], __end___rse_patchlist[]; 18extern char __start___rse_patchlist[], __end___rse_patchlist[];
16extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[]; 19extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index d45f215bc8fc..51b75cea7018 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -1232,9 +1232,10 @@ efi_initialize_iomem_resources(struct resource *code_resource,
1232 if (md->attribute & EFI_MEMORY_WP) { 1232 if (md->attribute & EFI_MEMORY_WP) {
1233 name = "System ROM"; 1233 name = "System ROM";
1234 flags |= IORESOURCE_READONLY; 1234 flags |= IORESOURCE_READONLY;
1235 } else { 1235 } else if (md->attribute == EFI_MEMORY_UC)
1236 name = "Uncached RAM";
1237 else
1236 name = "System RAM"; 1238 name = "System RAM";
1237 }
1238 break; 1239 break;
1239 1240
1240 case EFI_ACPI_MEMORY_NVS: 1241 case EFI_ACPI_MEMORY_NVS:
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 8bdea8eb62e3..66e491d8baac 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -367,16 +367,17 @@ start_ap:
367 ;; 367 ;;
368#else 368#else
369(isAP) br.few 2f 369(isAP) br.few 2f
370 mov r20=r19 370 movl r20=__cpu0_per_cpu
371 sub r19=r19,r18
372 ;; 371 ;;
373 shr.u r18=r18,3 372 shr.u r18=r18,3
3741: 3731:
375 ld8 r21=[r20],8;; 374 ld8 r21=[r19],8;;
376 st8[r19]=r21,8 375 st8[r20]=r21,8
377 adds r18=-1,r18;; 376 adds r18=-1,r18;;
378 cmp4.lt p7,p6=0,r18 377 cmp4.lt p7,p6=0,r18
379(p7) br.cond.dptk.few 1b 378(p7) br.cond.dptk.few 1b
379 mov r19=r20
380 ;;
3802: 3812:
381#endif 382#endif
382 tpa r19=r19 383 tpa r19=r19
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c27d5b2c182b..de636b215677 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -616,7 +616,9 @@ setup_arch (char **cmdline_p)
616 ia64_mca_init(); 616 ia64_mca_init();
617 617
618 platform_setup(cmdline_p); 618 platform_setup(cmdline_p);
619#ifndef CONFIG_IA64_HP_SIM
619 check_sal_cache_flush(); 620 check_sal_cache_flush();
621#endif
620 paging_init(); 622 paging_init();
621} 623}
622 624
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index de71da811cd6..10a7d47e8510 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -215,9 +215,6 @@ SECTIONS
215 /* Per-cpu data: */ 215 /* Per-cpu data: */
216 percpu : { } :percpu 216 percpu : { } :percpu
217 . = ALIGN(PERCPU_PAGE_SIZE); 217 . = ALIGN(PERCPU_PAGE_SIZE);
218#ifdef CONFIG_SMP
219 . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */
220#endif
221 __phys_per_cpu_start = .; 218 __phys_per_cpu_start = .;
222 .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) 219 .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
223 { 220 {
@@ -233,6 +230,11 @@ SECTIONS
233 data : { } :data 230 data : { } :data
234 .data : AT(ADDR(.data) - LOAD_OFFSET) 231 .data : AT(ADDR(.data) - LOAD_OFFSET)
235 { 232 {
233#ifdef CONFIG_SMP
234 . = ALIGN(PERCPU_PAGE_SIZE);
235 __cpu0_per_cpu = .;
236 . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */
237#endif
236 DATA_DATA 238 DATA_DATA
237 *(.data1) 239 *(.data1)
238 *(.gnu.linkonce.d*) 240 *(.gnu.linkonce.d*)
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 7a37d06376be..cd0d1a7284b7 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -38,6 +38,7 @@
38#include <asm/cacheflush.h> 38#include <asm/cacheflush.h>
39#include <asm/div64.h> 39#include <asm/div64.h>
40#include <asm/tlb.h> 40#include <asm/tlb.h>
41#include <asm/elf.h>
41 42
42#include "misc.h" 43#include "misc.h"
43#include "vti.h" 44#include "vti.h"
@@ -61,12 +62,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
61 { NULL } 62 { NULL }
62}; 63};
63 64
64
65struct fdesc{
66 unsigned long ip;
67 unsigned long gp;
68};
69
70static void kvm_flush_icache(unsigned long start, unsigned long len) 65static void kvm_flush_icache(unsigned long start, unsigned long len)
71{ 66{
72 int l; 67 int l;
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index e566ff43884a..0ee085efbe29 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -163,7 +163,7 @@ per_cpu_init (void)
163 * get_zeroed_page(). 163 * get_zeroed_page().
164 */ 164 */
165 if (first_time) { 165 if (first_time) {
166 void *cpu0_data = __phys_per_cpu_start - PERCPU_PAGE_SIZE; 166 void *cpu0_data = __cpu0_per_cpu;
167 167
168 first_time=0; 168 first_time=0;
169 169
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 78026aabaa7f..d8c5fcd89e5b 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -144,7 +144,7 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
144 144
145 for_each_possible_early_cpu(cpu) { 145 for_each_possible_early_cpu(cpu) {
146 if (cpu == 0) { 146 if (cpu == 0) {
147 void *cpu0_data = __phys_per_cpu_start - PERCPU_PAGE_SIZE; 147 void *cpu0_data = __cpu0_per_cpu;
148 __per_cpu_offset[cpu] = (char*)cpu0_data - 148 __per_cpu_offset[cpu] = (char*)cpu0_data -
149 __per_cpu_start; 149 __per_cpu_start;
150 } else if (node == node_cpuid[cpu].nid) { 150 } else if (node == node_cpuid[cpu].nid) {
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index a5f864c445b2..f57113f1f892 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -216,10 +216,6 @@ config MEMORY_SIZE
216 default "01000000" if PLAT_M32104UT 216 default "01000000" if PLAT_M32104UT
217 default "00800000" if PLAT_OAKS32R 217 default "00800000" if PLAT_OAKS32R
218 218
219config NOHIGHMEM
220 bool
221 default y
222
223config ARCH_DISCONTIGMEM_ENABLE 219config ARCH_DISCONTIGMEM_ENABLE
224 bool "Internal RAM Support" 220 bool "Internal RAM Support"
225 depends on CHIP_M32700 || CHIP_M32102 || CHIP_VDEC2 || CHIP_OPSP || CHIP_M32104 221 depends on CHIP_M32700 || CHIP_M32102 || CHIP_VDEC2 || CHIP_OPSP || CHIP_M32104
@@ -410,11 +406,7 @@ config PCI_DIRECT
410source "drivers/pci/Kconfig" 406source "drivers/pci/Kconfig"
411 407
412config ISA 408config ISA
413 bool "ISA support" 409 bool
414 help
415 Find out whether you have ISA slots on your motherboard. ISA is the
416 name of a bus system, i.e. the way the CPU talks to the other stuff
417 inside your box. If you have ISA, say Y, otherwise N.
418 410
419source "drivers/pcmcia/Kconfig" 411source "drivers/pcmcia/Kconfig"
420 412
diff --git a/arch/m32r/kernel/entry.S b/arch/m32r/kernel/entry.S
index d4eaa2fd1818..612d35b082a6 100644
--- a/arch/m32r/kernel/entry.S
+++ b/arch/m32r/kernel/entry.S
@@ -143,7 +143,7 @@ ret_from_intr:
143 and3 r4, r4, #0x8000 ; check BSM bit 143 and3 r4, r4, #0x8000 ; check BSM bit
144#endif 144#endif
145 beqz r4, resume_kernel 145 beqz r4, resume_kernel
146ENTRY(resume_userspace) 146resume_userspace:
147 DISABLE_INTERRUPTS(r4) ; make sure we don't miss an interrupt 147 DISABLE_INTERRUPTS(r4) ; make sure we don't miss an interrupt
148 ; setting need_resched or sigpending 148 ; setting need_resched or sigpending
149 ; between sampling and the iret 149 ; between sampling and the iret
diff --git a/arch/m32r/kernel/head.S b/arch/m32r/kernel/head.S
index dab7436d7bbe..40180778a5c7 100644
--- a/arch/m32r/kernel/head.S
+++ b/arch/m32r/kernel/head.S
@@ -29,7 +29,6 @@ __INITDATA
29 .global _end 29 .global _end
30ENTRY(stext) 30ENTRY(stext)
31ENTRY(_stext) 31ENTRY(_stext)
32ENTRY(startup_32)
33 /* Setup up the stack pointer */ 32 /* Setup up the stack pointer */
34 LDIMM (r0, spi_stack_top) 33 LDIMM (r0, spi_stack_top)
35 LDIMM (r1, spu_stack_top) 34 LDIMM (r1, spu_stack_top)
diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c
index d0c5b0b7da2f..2aeae4670098 100644
--- a/arch/m32r/kernel/irq.c
+++ b/arch/m32r/kernel/irq.c
@@ -22,9 +22,6 @@
22#include <linux/module.h> 22#include <linux/module.h>
23#include <asm/uaccess.h> 23#include <asm/uaccess.h>
24 24
25atomic_t irq_err_count;
26atomic_t irq_mis_count;
27
28/* 25/*
29 * Generic, controller-independent functions: 26 * Generic, controller-independent functions:
30 */ 27 */
@@ -63,9 +60,6 @@ int show_interrupts(struct seq_file *p, void *v)
63 seq_putc(p, '\n'); 60 seq_putc(p, '\n');
64skip: 61skip:
65 spin_unlock_irqrestore(&irq_desc[i].lock, flags); 62 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
66 } else if (i == NR_IRQS) {
67 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
68 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
69 } 63 }
70 return 0; 64 return 0;
71} 65}
diff --git a/arch/m32r/kernel/m32r_ksyms.c b/arch/m32r/kernel/m32r_ksyms.c
index 16bcb189a383..22624b51d4d3 100644
--- a/arch/m32r/kernel/m32r_ksyms.c
+++ b/arch/m32r/kernel/m32r_ksyms.c
@@ -14,6 +14,7 @@
14#include <asm/delay.h> 14#include <asm/delay.h>
15#include <asm/irq.h> 15#include <asm/irq.h>
16#include <asm/tlbflush.h> 16#include <asm/tlbflush.h>
17#include <asm/pgtable.h>
17 18
18/* platform dependent support */ 19/* platform dependent support */
19EXPORT_SYMBOL(boot_cpu_data); 20EXPORT_SYMBOL(boot_cpu_data);
@@ -65,6 +66,7 @@ EXPORT_SYMBOL(memset);
65EXPORT_SYMBOL(copy_page); 66EXPORT_SYMBOL(copy_page);
66EXPORT_SYMBOL(clear_page); 67EXPORT_SYMBOL(clear_page);
67EXPORT_SYMBOL(strlen); 68EXPORT_SYMBOL(strlen);
69EXPORT_SYMBOL(empty_zero_page);
68 70
69EXPORT_SYMBOL(_inb); 71EXPORT_SYMBOL(_inb);
70EXPORT_SYMBOL(_inw); 72EXPORT_SYMBOL(_inw);
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index a689e2978b6e..5be4faaf5b1c 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -35,8 +35,6 @@
35 35
36#include <linux/err.h> 36#include <linux/err.h>
37 37
38static int hlt_counter=0;
39
40/* 38/*
41 * Return saved PC of a blocked thread. 39 * Return saved PC of a blocked thread.
42 */ 40 */
@@ -48,31 +46,16 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
48/* 46/*
49 * Powermanagement idle function, if any.. 47 * Powermanagement idle function, if any..
50 */ 48 */
51void (*pm_idle)(void) = NULL; 49static void (*pm_idle)(void) = NULL;
52EXPORT_SYMBOL(pm_idle);
53 50
54void (*pm_power_off)(void) = NULL; 51void (*pm_power_off)(void) = NULL;
55EXPORT_SYMBOL(pm_power_off); 52EXPORT_SYMBOL(pm_power_off);
56 53
57void disable_hlt(void)
58{
59 hlt_counter++;
60}
61
62EXPORT_SYMBOL(disable_hlt);
63
64void enable_hlt(void)
65{
66 hlt_counter--;
67}
68
69EXPORT_SYMBOL(enable_hlt);
70
71/* 54/*
72 * We use this is we don't have any better 55 * We use this is we don't have any better
73 * idle routine.. 56 * idle routine..
74 */ 57 */
75void default_idle(void) 58static void default_idle(void)
76{ 59{
77 /* M32R_FIXME: Please use "cpu_sleep" mode. */ 60 /* M32R_FIXME: Please use "cpu_sleep" mode. */
78 cpu_relax(); 61 cpu_relax();
@@ -260,15 +243,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long spu,
260 return 0; 243 return 0;
261} 244}
262 245
263/*
264 * Capture the user space registers if the task is not running (in user space)
265 */
266int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
267{
268 /* M32R_FIXME */
269 return 1;
270}
271
272asmlinkage int sys_fork(unsigned long r0, unsigned long r1, unsigned long r2, 246asmlinkage int sys_fork(unsigned long r0, unsigned long r1, unsigned long r2,
273 unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, 247 unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6,
274 struct pt_regs regs) 248 struct pt_regs regs)
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index 7577f971ea4e..929e5c9d3ad9 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -84,7 +84,7 @@ void smp_send_timer(void);
84void smp_ipi_timer_interrupt(struct pt_regs *); 84void smp_ipi_timer_interrupt(struct pt_regs *);
85void smp_local_timer_interrupt(void); 85void smp_local_timer_interrupt(void);
86 86
87void send_IPI_allbutself(int, int); 87static void send_IPI_allbutself(int, int);
88static void send_IPI_mask(cpumask_t, int, int); 88static void send_IPI_mask(cpumask_t, int, int);
89unsigned long send_IPI_mask_phys(cpumask_t, int, int); 89unsigned long send_IPI_mask_phys(cpumask_t, int, int);
90 90
@@ -722,7 +722,7 @@ void smp_local_timer_interrupt(void)
722 * ---------- --- -------------------------------------------------------- 722 * ---------- --- --------------------------------------------------------
723 * 723 *
724 *==========================================================================*/ 724 *==========================================================================*/
725void send_IPI_allbutself(int ipi_num, int try) 725static void send_IPI_allbutself(int ipi_num, int try)
726{ 726{
727 cpumask_t cpumask; 727 cpumask_t cpumask;
728 728
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index 994cc1556355..6ea017727cce 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -34,7 +34,6 @@
34#include <asm/hw_irq.h> 34#include <asm/hw_irq.h>
35 35
36#ifdef CONFIG_SMP 36#ifdef CONFIG_SMP
37extern void send_IPI_allbutself(int, int);
38extern void smp_local_timer_interrupt(void); 37extern void smp_local_timer_interrupt(void);
39#endif 38#endif
40 39
@@ -188,7 +187,7 @@ static long last_rtc_update = 0;
188 * timer_interrupt() needs to keep up the real-time clock, 187 * timer_interrupt() needs to keep up the real-time clock,
189 * as well as call the "do_timer()" routine every clocktick 188 * as well as call the "do_timer()" routine every clocktick
190 */ 189 */
191irqreturn_t timer_interrupt(int irq, void *dev_id) 190static irqreturn_t timer_interrupt(int irq, void *dev_id)
192{ 191{
193#ifndef CONFIG_SMP 192#ifndef CONFIG_SMP
194 profile_tick(CPU_PROFILING); 193 profile_tick(CPU_PROFILING);
@@ -228,7 +227,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
228 return IRQ_HANDLED; 227 return IRQ_HANDLED;
229} 228}
230 229
231struct irqaction irq0 = { 230static struct irqaction irq0 = {
232 .handler = timer_interrupt, 231 .handler = timer_interrupt,
233 .flags = IRQF_DISABLED, 232 .flags = IRQF_DISABLED,
234 .mask = CPU_MASK_NONE, 233 .mask = CPU_MASK_NONE,
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index 46159a4e644b..03b14e55cd89 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -61,7 +61,7 @@ extern unsigned long eit_vector[];
61 ((unsigned long)func - (unsigned long)eit_vector - entry*4)/4 \ 61 ((unsigned long)func - (unsigned long)eit_vector - entry*4)/4 \
62 + 0xff000000UL 62 + 0xff000000UL
63 63
64void set_eit_vector_entries(void) 64static void set_eit_vector_entries(void)
65{ 65{
66 extern void default_eit_handler(void); 66 extern void default_eit_handler(void);
67 extern void system_call(void); 67 extern void system_call(void);
@@ -121,9 +121,9 @@ void __init trap_init(void)
121 cpu_init(); 121 cpu_init();
122} 122}
123 123
124int kstack_depth_to_print = 24; 124static int kstack_depth_to_print = 24;
125 125
126void show_trace(struct task_struct *task, unsigned long *stack) 126static void show_trace(struct task_struct *task, unsigned long *stack)
127{ 127{
128 unsigned long addr; 128 unsigned long addr;
129 129
@@ -224,7 +224,7 @@ bad:
224 printk("\n"); 224 printk("\n");
225} 225}
226 226
227DEFINE_SPINLOCK(die_lock); 227static DEFINE_SPINLOCK(die_lock);
228 228
229void die(const char * str, struct pt_regs * regs, long err) 229void die(const char * str, struct pt_regs * regs, long err)
230{ 230{
diff --git a/arch/m32r/lib/delay.c b/arch/m32r/lib/delay.c
index 59bfc34e0d9f..ced549be80f5 100644
--- a/arch/m32r/lib/delay.c
+++ b/arch/m32r/lib/delay.c
@@ -6,6 +6,7 @@
6 */ 6 */
7 7
8#include <linux/param.h> 8#include <linux/param.h>
9#include <linux/module.h>
9#ifdef CONFIG_SMP 10#ifdef CONFIG_SMP
10#include <linux/sched.h> 11#include <linux/sched.h>
11#include <asm/current.h> 12#include <asm/current.h>
@@ -121,3 +122,4 @@ void __ndelay(unsigned long nsecs)
121{ 122{
122 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ 123 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
123} 124}
125EXPORT_SYMBOL(__ndelay);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 49896a2a1d72..1e06d233fa83 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -211,6 +211,7 @@ config MIPS_MALTA
211 select SYS_SUPPORTS_64BIT_KERNEL 211 select SYS_SUPPORTS_64BIT_KERNEL
212 select SYS_SUPPORTS_BIG_ENDIAN 212 select SYS_SUPPORTS_BIG_ENDIAN
213 select SYS_SUPPORTS_LITTLE_ENDIAN 213 select SYS_SUPPORTS_LITTLE_ENDIAN
214 select SYS_SUPPORTS_MIPS_CMP if BROKEN # because SYNC_R4K is broken
214 select SYS_SUPPORTS_MULTITHREADING 215 select SYS_SUPPORTS_MULTITHREADING
215 select SYS_SUPPORTS_SMARTMIPS 216 select SYS_SUPPORTS_SMARTMIPS
216 help 217 help
@@ -1403,7 +1404,6 @@ config MIPS_MT_SMTC
1403 depends on CPU_MIPS32_R2 1404 depends on CPU_MIPS32_R2
1404 #depends on CPU_MIPS64_R2 # once there is hardware ... 1405 #depends on CPU_MIPS64_R2 # once there is hardware ...
1405 depends on SYS_SUPPORTS_MULTITHREADING 1406 depends on SYS_SUPPORTS_MULTITHREADING
1406 select GENERIC_CLOCKEVENTS_BROADCAST
1407 select CPU_MIPSR2_IRQ_VI 1407 select CPU_MIPSR2_IRQ_VI
1408 select CPU_MIPSR2_IRQ_EI 1408 select CPU_MIPSR2_IRQ_EI
1409 select MIPS_MT 1409 select MIPS_MT
@@ -1451,32 +1451,17 @@ config MIPS_VPE_LOADER
1451 Includes a loader for loading an elf relocatable object 1451 Includes a loader for loading an elf relocatable object
1452 onto another VPE and running it. 1452 onto another VPE and running it.
1453 1453
1454config MIPS_MT_SMTC_INSTANT_REPLAY
1455 bool "Low-latency Dispatch of Deferred SMTC IPIs"
1456 depends on MIPS_MT_SMTC && !PREEMPT
1457 default y
1458 help
1459 SMTC pseudo-interrupts between TCs are deferred and queued
1460 if the target TC is interrupt-inhibited (IXMT). In the first
1461 SMTC prototypes, these queued IPIs were serviced on return
1462 to user mode, or on entry into the kernel idle loop. The
1463 INSTANT_REPLAY option dispatches them as part of local_irq_restore()
1464 processing, which adds runtime overhead (hence the option to turn
1465 it off), but ensures that IPIs are handled promptly even under
1466 heavy I/O interrupt load.
1467
1468config MIPS_MT_SMTC_IM_BACKSTOP 1454config MIPS_MT_SMTC_IM_BACKSTOP
1469 bool "Use per-TC register bits as backstop for inhibited IM bits" 1455 bool "Use per-TC register bits as backstop for inhibited IM bits"
1470 depends on MIPS_MT_SMTC 1456 depends on MIPS_MT_SMTC
1471 default y 1457 default n
1472 help 1458 help
1473 To support multiple TC microthreads acting as "CPUs" within 1459 To support multiple TC microthreads acting as "CPUs" within
1474 a VPE, VPE-wide interrupt mask bits must be specially manipulated 1460 a VPE, VPE-wide interrupt mask bits must be specially manipulated
1475 during interrupt handling. To support legacy drivers and interrupt 1461 during interrupt handling. To support legacy drivers and interrupt
1476 controller management code, SMTC has a "backstop" to track and 1462 controller management code, SMTC has a "backstop" to track and
1477 if necessary restore the interrupt mask. This has some performance 1463 if necessary restore the interrupt mask. This has some performance
1478 impact on interrupt service overhead. Disable it only if you know 1464 impact on interrupt service overhead.
1479 what you are doing.
1480 1465
1481config MIPS_MT_SMTC_IRQAFF 1466config MIPS_MT_SMTC_IRQAFF
1482 bool "Support IRQ affinity API" 1467 bool "Support IRQ affinity API"
@@ -1486,10 +1471,8 @@ config MIPS_MT_SMTC_IRQAFF
1486 Enables SMP IRQ affinity API (/proc/irq/*/smp_affinity, etc.) 1471 Enables SMP IRQ affinity API (/proc/irq/*/smp_affinity, etc.)
1487 for SMTC Linux kernel. Requires platform support, of which 1472 for SMTC Linux kernel. Requires platform support, of which
1488 an example can be found in the MIPS kernel i8259 and Malta 1473 an example can be found in the MIPS kernel i8259 and Malta
1489 platform code. It is recommended that MIPS_MT_SMTC_INSTANT_REPLAY 1474 platform code. Adds some overhead to interrupt dispatch, and
1490 be enabled if MIPS_MT_SMTC_IRQAFF is used. Adds overhead to 1475 should be used only if you know what you are doing.
1491 interrupt dispatch, and should be used only if you know what
1492 you are doing.
1493 1476
1494config MIPS_VPE_LOADER_TOM 1477config MIPS_VPE_LOADER_TOM
1495 bool "Load VPE program into memory hidden from linux" 1478 bool "Load VPE program into memory hidden from linux"
@@ -1517,6 +1500,18 @@ config MIPS_APSP_KSPD
1517 "exit" syscall notifying other kernel modules the SP program is 1500 "exit" syscall notifying other kernel modules the SP program is
1518 exiting. You probably want to say yes here. 1501 exiting. You probably want to say yes here.
1519 1502
1503config MIPS_CMP
1504 bool "MIPS CMP framework support"
1505 depends on SYS_SUPPORTS_MIPS_CMP
1506 select SYNC_R4K if BROKEN
1507 select SYS_SUPPORTS_SMP
1508 select SYS_SUPPORTS_SCHED_SMT if SMP
1509 select WEAK_ORDERING
1510 default n
1511 help
1512 This is a placeholder option for the GCMP work. It will need to
1513 be handled differently...
1514
1520config SB1_PASS_1_WORKAROUNDS 1515config SB1_PASS_1_WORKAROUNDS
1521 bool 1516 bool
1522 depends on CPU_SB1_PASS_1 1517 depends on CPU_SB1_PASS_1
@@ -1693,6 +1688,9 @@ config SMP
1693config SMP_UP 1688config SMP_UP
1694 bool 1689 bool
1695 1690
1691config SYS_SUPPORTS_MIPS_CMP
1692 bool
1693
1696config SYS_SUPPORTS_SMP 1694config SYS_SUPPORTS_SMP
1697 bool 1695 bool
1698 1696
@@ -1740,17 +1738,6 @@ config NR_CPUS
1740 performance should round up your number of processors to the next 1738 performance should round up your number of processors to the next
1741 power of two. 1739 power of two.
1742 1740
1743config MIPS_CMP
1744 bool "MIPS CMP framework support"
1745 depends on SMP
1746 select SYNC_R4K
1747 select SYS_SUPPORTS_SCHED_SMT
1748 select WEAK_ORDERING
1749 default n
1750 help
1751 This is a placeholder option for the GCMP work. It will need to
1752 be handled differently...
1753
1754source "kernel/time/Kconfig" 1741source "kernel/time/Kconfig"
1755 1742
1756# 1743#
diff --git a/arch/mips/au1000/common/gpio.c b/arch/mips/au1000/common/gpio.c
index b485d94ce8a5..e660ddd611c4 100644
--- a/arch/mips/au1000/common/gpio.c
+++ b/arch/mips/au1000/common/gpio.c
@@ -48,7 +48,7 @@ static void au1xxx_gpio2_write(unsigned gpio, int value)
48{ 48{
49 gpio -= AU1XXX_GPIO_BASE; 49 gpio -= AU1XXX_GPIO_BASE;
50 50
51 gpio2->output = (GPIO2_OUTPUT_ENABLE_MASK << gpio) | (value << gpio); 51 gpio2->output = (GPIO2_OUTPUT_ENABLE_MASK << gpio) | ((!!value) << gpio);
52} 52}
53 53
54static int au1xxx_gpio2_direction_input(unsigned gpio) 54static int au1xxx_gpio2_direction_input(unsigned gpio)
@@ -61,7 +61,8 @@ static int au1xxx_gpio2_direction_input(unsigned gpio)
61static int au1xxx_gpio2_direction_output(unsigned gpio, int value) 61static int au1xxx_gpio2_direction_output(unsigned gpio, int value)
62{ 62{
63 gpio -= AU1XXX_GPIO_BASE; 63 gpio -= AU1XXX_GPIO_BASE;
64 gpio2->dir = (0x01 << gpio) | (value << gpio); 64 gpio2->dir |= 0x01 << gpio;
65 gpio2->output = (GPIO2_OUTPUT_ENABLE_MASK << gpio) | ((!!value) << gpio);
65 return 0; 66 return 0;
66} 67}
67 68
@@ -90,6 +91,7 @@ static int au1xxx_gpio1_direction_input(unsigned gpio)
90static int au1xxx_gpio1_direction_output(unsigned gpio, int value) 91static int au1xxx_gpio1_direction_output(unsigned gpio, int value)
91{ 92{
92 gpio1->trioutclr = (0x01 & gpio); 93 gpio1->trioutclr = (0x01 & gpio);
94 au1xxx_gpio1_write(gpio, value);
93 return 0; 95 return 0;
94} 96}
95 97
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 706f93974797..25775cb54000 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \
10 10
11obj-$(CONFIG_CEVT_BCM1480) += cevt-bcm1480.o 11obj-$(CONFIG_CEVT_BCM1480) += cevt-bcm1480.o
12obj-$(CONFIG_CEVT_R4K) += cevt-r4k.o 12obj-$(CONFIG_CEVT_R4K) += cevt-r4k.o
13obj-$(CONFIG_MIPS_MT_SMTC) += cevt-smtc.o
13obj-$(CONFIG_CEVT_DS1287) += cevt-ds1287.o 14obj-$(CONFIG_CEVT_DS1287) += cevt-ds1287.o
14obj-$(CONFIG_CEVT_GT641XX) += cevt-gt641xx.o 15obj-$(CONFIG_CEVT_GT641XX) += cevt-gt641xx.o
15obj-$(CONFIG_CEVT_SB1250) += cevt-sb1250.o 16obj-$(CONFIG_CEVT_SB1250) += cevt-sb1250.o
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 24a2d907aa0d..4a4c59f2737a 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -12,6 +12,14 @@
12 12
13#include <asm/smtc_ipi.h> 13#include <asm/smtc_ipi.h>
14#include <asm/time.h> 14#include <asm/time.h>
15#include <asm/cevt-r4k.h>
16
17/*
18 * The SMTC Kernel for the 34K, 1004K, et. al. replaces several
19 * of these routines with SMTC-specific variants.
20 */
21
22#ifndef CONFIG_MIPS_MT_SMTC
15 23
16static int mips_next_event(unsigned long delta, 24static int mips_next_event(unsigned long delta,
17 struct clock_event_device *evt) 25 struct clock_event_device *evt)
@@ -19,60 +27,27 @@ static int mips_next_event(unsigned long delta,
19 unsigned int cnt; 27 unsigned int cnt;
20 int res; 28 int res;
21 29
22#ifdef CONFIG_MIPS_MT_SMTC
23 {
24 unsigned long flags, vpflags;
25 local_irq_save(flags);
26 vpflags = dvpe();
27#endif
28 cnt = read_c0_count(); 30 cnt = read_c0_count();
29 cnt += delta; 31 cnt += delta;
30 write_c0_compare(cnt); 32 write_c0_compare(cnt);
31 res = ((int)(read_c0_count() - cnt) > 0) ? -ETIME : 0; 33 res = ((int)(read_c0_count() - cnt) > 0) ? -ETIME : 0;
32#ifdef CONFIG_MIPS_MT_SMTC
33 evpe(vpflags);
34 local_irq_restore(flags);
35 }
36#endif
37 return res; 34 return res;
38} 35}
39 36
40static void mips_set_mode(enum clock_event_mode mode, 37#endif /* CONFIG_MIPS_MT_SMTC */
41 struct clock_event_device *evt) 38
39void mips_set_clock_mode(enum clock_event_mode mode,
40 struct clock_event_device *evt)
42{ 41{
43 /* Nothing to do ... */ 42 /* Nothing to do ... */
44} 43}
45 44
46static DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device); 45DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
47static int cp0_timer_irq_installed; 46int cp0_timer_irq_installed;
48 47
49/* 48#ifndef CONFIG_MIPS_MT_SMTC
50 * Timer ack for an R4k-compatible timer of a known frequency.
51 */
52static void c0_timer_ack(void)
53{
54 write_c0_compare(read_c0_compare());
55}
56 49
57/* 50irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
58 * Possibly handle a performance counter interrupt.
59 * Return true if the timer interrupt should not be checked
60 */
61static inline int handle_perf_irq(int r2)
62{
63 /*
64 * The performance counter overflow interrupt may be shared with the
65 * timer interrupt (cp0_perfcount_irq < 0). If it is and a
66 * performance counter has overflowed (perf_irq() == IRQ_HANDLED)
67 * and we can't reliably determine if a counter interrupt has also
68 * happened (!r2) then don't check for a timer interrupt.
69 */
70 return (cp0_perfcount_irq < 0) &&
71 perf_irq() == IRQ_HANDLED &&
72 !r2;
73}
74
75static irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
76{ 51{
77 const int r2 = cpu_has_mips_r2; 52 const int r2 = cpu_has_mips_r2;
78 struct clock_event_device *cd; 53 struct clock_event_device *cd;
@@ -93,12 +68,8 @@ static irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
93 * interrupt. Being the paranoiacs we are we check anyway. 68 * interrupt. Being the paranoiacs we are we check anyway.
94 */ 69 */
95 if (!r2 || (read_c0_cause() & (1 << 30))) { 70 if (!r2 || (read_c0_cause() & (1 << 30))) {
96 c0_timer_ack(); 71 /* Clear Count/Compare Interrupt */
97#ifdef CONFIG_MIPS_MT_SMTC 72 write_c0_compare(read_c0_compare());
98 if (cpu_data[cpu].vpe_id)
99 goto out;
100 cpu = 0;
101#endif
102 cd = &per_cpu(mips_clockevent_device, cpu); 73 cd = &per_cpu(mips_clockevent_device, cpu);
103 cd->event_handler(cd); 74 cd->event_handler(cd);
104 } 75 }
@@ -107,65 +78,16 @@ out:
107 return IRQ_HANDLED; 78 return IRQ_HANDLED;
108} 79}
109 80
110static struct irqaction c0_compare_irqaction = { 81#endif /* Not CONFIG_MIPS_MT_SMTC */
82
83struct irqaction c0_compare_irqaction = {
111 .handler = c0_compare_interrupt, 84 .handler = c0_compare_interrupt,
112#ifdef CONFIG_MIPS_MT_SMTC
113 .flags = IRQF_DISABLED,
114#else
115 .flags = IRQF_DISABLED | IRQF_PERCPU, 85 .flags = IRQF_DISABLED | IRQF_PERCPU,
116#endif
117 .name = "timer", 86 .name = "timer",
118}; 87};
119 88
120#ifdef CONFIG_MIPS_MT_SMTC
121DEFINE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device);
122
123static void smtc_set_mode(enum clock_event_mode mode,
124 struct clock_event_device *evt)
125{
126}
127
128static void mips_broadcast(cpumask_t mask)
129{
130 unsigned int cpu;
131
132 for_each_cpu_mask(cpu, mask)
133 smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
134}
135
136static void setup_smtc_dummy_clockevent_device(void)
137{
138 //uint64_t mips_freq = mips_hpt_^frequency;
139 unsigned int cpu = smp_processor_id();
140 struct clock_event_device *cd;
141 89
142 cd = &per_cpu(smtc_dummy_clockevent_device, cpu); 90void mips_event_handler(struct clock_event_device *dev)
143
144 cd->name = "SMTC";
145 cd->features = CLOCK_EVT_FEAT_DUMMY;
146
147 /* Calculate the min / max delta */
148 cd->mult = 0; //div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32);
149 cd->shift = 0; //32;
150 cd->max_delta_ns = 0; //clockevent_delta2ns(0x7fffffff, cd);
151 cd->min_delta_ns = 0; //clockevent_delta2ns(0x30, cd);
152
153 cd->rating = 200;
154 cd->irq = 17; //-1;
155// if (cpu)
156// cd->cpumask = CPU_MASK_ALL; // cpumask_of_cpu(cpu);
157// else
158 cd->cpumask = cpumask_of_cpu(cpu);
159
160 cd->set_mode = smtc_set_mode;
161
162 cd->broadcast = mips_broadcast;
163
164 clockevents_register_device(cd);
165}
166#endif
167
168static void mips_event_handler(struct clock_event_device *dev)
169{ 91{
170} 92}
171 93
@@ -177,7 +99,23 @@ static int c0_compare_int_pending(void)
177 return (read_c0_cause() >> cp0_compare_irq) & 0x100; 99 return (read_c0_cause() >> cp0_compare_irq) & 0x100;
178} 100}
179 101
180static int c0_compare_int_usable(void) 102/*
103 * Compare interrupt can be routed and latched outside the core,
104 * so a single execution hazard barrier may not be enough to give
105 * it time to clear as seen in the Cause register. 4 time the
106 * pipeline depth seems reasonably conservative, and empirically
107 * works better in configurations with high CPU/bus clock ratios.
108 */
109
110#define compare_change_hazard() \
111 do { \
112 irq_disable_hazard(); \
113 irq_disable_hazard(); \
114 irq_disable_hazard(); \
115 irq_disable_hazard(); \
116 } while (0)
117
118int c0_compare_int_usable(void)
181{ 119{
182 unsigned int delta; 120 unsigned int delta;
183 unsigned int cnt; 121 unsigned int cnt;
@@ -187,7 +125,7 @@ static int c0_compare_int_usable(void)
187 */ 125 */
188 if (c0_compare_int_pending()) { 126 if (c0_compare_int_pending()) {
189 write_c0_compare(read_c0_count()); 127 write_c0_compare(read_c0_count());
190 irq_disable_hazard(); 128 compare_change_hazard();
191 if (c0_compare_int_pending()) 129 if (c0_compare_int_pending())
192 return 0; 130 return 0;
193 } 131 }
@@ -196,7 +134,7 @@ static int c0_compare_int_usable(void)
196 cnt = read_c0_count(); 134 cnt = read_c0_count();
197 cnt += delta; 135 cnt += delta;
198 write_c0_compare(cnt); 136 write_c0_compare(cnt);
199 irq_disable_hazard(); 137 compare_change_hazard();
200 if ((int)(read_c0_count() - cnt) < 0) 138 if ((int)(read_c0_count() - cnt) < 0)
201 break; 139 break;
202 /* increase delta if the timer was already expired */ 140 /* increase delta if the timer was already expired */
@@ -205,11 +143,12 @@ static int c0_compare_int_usable(void)
205 while ((int)(read_c0_count() - cnt) <= 0) 143 while ((int)(read_c0_count() - cnt) <= 0)
206 ; /* Wait for expiry */ 144 ; /* Wait for expiry */
207 145
146 compare_change_hazard();
208 if (!c0_compare_int_pending()) 147 if (!c0_compare_int_pending())
209 return 0; 148 return 0;
210 149
211 write_c0_compare(read_c0_count()); 150 write_c0_compare(read_c0_count());
212 irq_disable_hazard(); 151 compare_change_hazard();
213 if (c0_compare_int_pending()) 152 if (c0_compare_int_pending())
214 return 0; 153 return 0;
215 154
@@ -219,6 +158,8 @@ static int c0_compare_int_usable(void)
219 return 1; 158 return 1;
220} 159}
221 160
161#ifndef CONFIG_MIPS_MT_SMTC
162
222int __cpuinit mips_clockevent_init(void) 163int __cpuinit mips_clockevent_init(void)
223{ 164{
224 uint64_t mips_freq = mips_hpt_frequency; 165 uint64_t mips_freq = mips_hpt_frequency;
@@ -229,17 +170,6 @@ int __cpuinit mips_clockevent_init(void)
229 if (!cpu_has_counter || !mips_hpt_frequency) 170 if (!cpu_has_counter || !mips_hpt_frequency)
230 return -ENXIO; 171 return -ENXIO;
231 172
232#ifdef CONFIG_MIPS_MT_SMTC
233 setup_smtc_dummy_clockevent_device();
234
235 /*
236 * On SMTC we only register VPE0's compare interrupt as clockevent
237 * device.
238 */
239 if (cpu)
240 return 0;
241#endif
242
243 if (!c0_compare_int_usable()) 173 if (!c0_compare_int_usable())
244 return -ENXIO; 174 return -ENXIO;
245 175
@@ -265,13 +195,9 @@ int __cpuinit mips_clockevent_init(void)
265 195
266 cd->rating = 300; 196 cd->rating = 300;
267 cd->irq = irq; 197 cd->irq = irq;
268#ifdef CONFIG_MIPS_MT_SMTC
269 cd->cpumask = CPU_MASK_ALL;
270#else
271 cd->cpumask = cpumask_of_cpu(cpu); 198 cd->cpumask = cpumask_of_cpu(cpu);
272#endif
273 cd->set_next_event = mips_next_event; 199 cd->set_next_event = mips_next_event;
274 cd->set_mode = mips_set_mode; 200 cd->set_mode = mips_set_clock_mode;
275 cd->event_handler = mips_event_handler; 201 cd->event_handler = mips_event_handler;
276 202
277 clockevents_register_device(cd); 203 clockevents_register_device(cd);
@@ -281,12 +207,9 @@ int __cpuinit mips_clockevent_init(void)
281 207
282 cp0_timer_irq_installed = 1; 208 cp0_timer_irq_installed = 1;
283 209
284#ifdef CONFIG_MIPS_MT_SMTC
285#define CPUCTR_IMASKBIT (0x100 << cp0_compare_irq)
286 setup_irq_smtc(irq, &c0_compare_irqaction, CPUCTR_IMASKBIT);
287#else
288 setup_irq(irq, &c0_compare_irqaction); 210 setup_irq(irq, &c0_compare_irqaction);
289#endif
290 211
291 return 0; 212 return 0;
292} 213}
214
215#endif /* Not CONFIG_MIPS_MT_SMTC */
diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c
new file mode 100644
index 000000000000..5162fe4b5952
--- /dev/null
+++ b/arch/mips/kernel/cevt-smtc.c
@@ -0,0 +1,321 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2007 MIPS Technologies, Inc.
7 * Copyright (C) 2007 Ralf Baechle <ralf@linux-mips.org>
8 * Copyright (C) 2008 Kevin D. Kissell, Paralogos sarl
9 */
10#include <linux/clockchips.h>
11#include <linux/interrupt.h>
12#include <linux/percpu.h>
13
14#include <asm/smtc_ipi.h>
15#include <asm/time.h>
16#include <asm/cevt-r4k.h>
17
18/*
19 * Variant clock event timer support for SMTC on MIPS 34K, 1004K
20 * or other MIPS MT cores.
21 *
22 * Notes on SMTC Support:
23 *
24 * SMTC has multiple microthread TCs pretending to be Linux CPUs.
25 * But there's only one Count/Compare pair per VPE, and Compare
26 * interrupts are taken opportunisitically by available TCs
27 * bound to the VPE with the Count register. The new timer
28 * framework provides for global broadcasts, but we really
29 * want VPE-level multicasts for best behavior. So instead
30 * of invoking the high-level clock-event broadcast code,
31 * this version of SMTC support uses the historical SMTC
32 * multicast mechanisms "under the hood", appearing to the
33 * generic clock layer as if the interrupts are per-CPU.
34 *
35 * The approach taken here is to maintain a set of NR_CPUS
36 * virtual timers, and track which "CPU" needs to be alerted
37 * at each event.
38 *
39 * It's unlikely that we'll see a MIPS MT core with more than
40 * 2 VPEs, but we *know* that we won't need to handle more
41 * VPEs than we have "CPUs". So NCPUs arrays of NCPUs elements
42 * is always going to be overkill, but always going to be enough.
43 */
44
45unsigned long smtc_nexttime[NR_CPUS][NR_CPUS];
46static int smtc_nextinvpe[NR_CPUS];
47
48/*
49 * Timestamps stored are absolute values to be programmed
50 * into Count register. Valid timestamps will never be zero.
51 * If a Zero Count value is actually calculated, it is converted
52 * to be a 1, which will introduce 1 or two CPU cycles of error
53 * roughly once every four billion events, which at 1000 HZ means
54 * about once every 50 days. If that's actually a problem, one
55 * could alternate squashing 0 to 1 and to -1.
56 */
57
58#define MAKEVALID(x) (((x) == 0L) ? 1L : (x))
59#define ISVALID(x) ((x) != 0L)
60
61/*
62 * Time comparison is subtle, as it's really truncated
63 * modular arithmetic.
64 */
65
66#define IS_SOONER(a, b, reference) \
67 (((a) - (unsigned long)(reference)) < ((b) - (unsigned long)(reference)))
68
69/*
70 * CATCHUP_INCREMENT, used when the function falls behind the counter.
71 * Could be an increasing function instead of a constant;
72 */
73
74#define CATCHUP_INCREMENT 64
75
76static int mips_next_event(unsigned long delta,
77 struct clock_event_device *evt)
78{
79 unsigned long flags;
80 unsigned int mtflags;
81 unsigned long timestamp, reference, previous;
82 unsigned long nextcomp = 0L;
83 int vpe = current_cpu_data.vpe_id;
84 int cpu = smp_processor_id();
85 local_irq_save(flags);
86 mtflags = dmt();
87
88 /*
89 * Maintain the per-TC virtual timer
90 * and program the per-VPE shared Count register
91 * as appropriate here...
92 */
93 reference = (unsigned long)read_c0_count();
94 timestamp = MAKEVALID(reference + delta);
95 /*
96 * To really model the clock, we have to catch the case
97 * where the current next-in-VPE timestamp is the old
98 * timestamp for the calling CPE, but the new value is
99 * in fact later. In that case, we have to do a full
100 * scan and discover the new next-in-VPE CPU id and
101 * timestamp.
102 */
103 previous = smtc_nexttime[vpe][cpu];
104 if (cpu == smtc_nextinvpe[vpe] && ISVALID(previous)
105 && IS_SOONER(previous, timestamp, reference)) {
106 int i;
107 int soonest = cpu;
108
109 /*
110 * Update timestamp array here, so that new
111 * value gets considered along with those of
112 * other virtual CPUs on the VPE.
113 */
114 smtc_nexttime[vpe][cpu] = timestamp;
115 for_each_online_cpu(i) {
116 if (ISVALID(smtc_nexttime[vpe][i])
117 && IS_SOONER(smtc_nexttime[vpe][i],
118 smtc_nexttime[vpe][soonest], reference)) {
119 soonest = i;
120 }
121 }
122 smtc_nextinvpe[vpe] = soonest;
123 nextcomp = smtc_nexttime[vpe][soonest];
124 /*
125 * Otherwise, we don't have to process the whole array rank,
126 * we just have to see if the event horizon has gotten closer.
127 */
128 } else {
129 if (!ISVALID(smtc_nexttime[vpe][smtc_nextinvpe[vpe]]) ||
130 IS_SOONER(timestamp,
131 smtc_nexttime[vpe][smtc_nextinvpe[vpe]], reference)) {
132 smtc_nextinvpe[vpe] = cpu;
133 nextcomp = timestamp;
134 }
135 /*
136 * Since next-in-VPE may me the same as the executing
137 * virtual CPU, we update the array *after* checking
138 * its value.
139 */
140 smtc_nexttime[vpe][cpu] = timestamp;
141 }
142
143 /*
144 * It may be that, in fact, we don't need to update Compare,
145 * but if we do, we want to make sure we didn't fall into
146 * a crack just behind Count.
147 */
148 if (ISVALID(nextcomp)) {
149 write_c0_compare(nextcomp);
150 ehb();
151 /*
152 * We never return an error, we just make sure
153 * that we trigger the handlers as quickly as
154 * we can if we fell behind.
155 */
156 while ((nextcomp - (unsigned long)read_c0_count())
157 > (unsigned long)LONG_MAX) {
158 nextcomp += CATCHUP_INCREMENT;
159 write_c0_compare(nextcomp);
160 ehb();
161 }
162 }
163 emt(mtflags);
164 local_irq_restore(flags);
165 return 0;
166}
167
168
169void smtc_distribute_timer(int vpe)
170{
171 unsigned long flags;
172 unsigned int mtflags;
173 int cpu;
174 struct clock_event_device *cd;
175 unsigned long nextstamp = 0L;
176 unsigned long reference;
177
178
179repeat:
180 for_each_online_cpu(cpu) {
181 /*
182 * Find virtual CPUs within the current VPE who have
183 * unserviced timer requests whose time is now past.
184 */
185 local_irq_save(flags);
186 mtflags = dmt();
187 if (cpu_data[cpu].vpe_id == vpe &&
188 ISVALID(smtc_nexttime[vpe][cpu])) {
189 reference = (unsigned long)read_c0_count();
190 if ((smtc_nexttime[vpe][cpu] - reference)
191 > (unsigned long)LONG_MAX) {
192 smtc_nexttime[vpe][cpu] = 0L;
193 emt(mtflags);
194 local_irq_restore(flags);
195 /*
196 * We don't send IPIs to ourself.
197 */
198 if (cpu != smp_processor_id()) {
199 smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
200 } else {
201 cd = &per_cpu(mips_clockevent_device, cpu);
202 cd->event_handler(cd);
203 }
204 } else {
205 /* Local to VPE but Valid Time not yet reached. */
206 if (!ISVALID(nextstamp) ||
207 IS_SOONER(smtc_nexttime[vpe][cpu], nextstamp,
208 reference)) {
209 smtc_nextinvpe[vpe] = cpu;
210 nextstamp = smtc_nexttime[vpe][cpu];
211 }
212 emt(mtflags);
213 local_irq_restore(flags);
214 }
215 } else {
216 emt(mtflags);
217 local_irq_restore(flags);
218
219 }
220 }
221 /* Reprogram for interrupt at next soonest timestamp for VPE */
222 if (ISVALID(nextstamp)) {
223 write_c0_compare(nextstamp);
224 ehb();
225 if ((nextstamp - (unsigned long)read_c0_count())
226 > (unsigned long)LONG_MAX)
227 goto repeat;
228 }
229}
230
231
232irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
233{
234 int cpu = smp_processor_id();
235
236 /* If we're running SMTC, we've got MIPS MT and therefore MIPS32R2 */
237 handle_perf_irq(1);
238
239 if (read_c0_cause() & (1 << 30)) {
240 /* Clear Count/Compare Interrupt */
241 write_c0_compare(read_c0_compare());
242 smtc_distribute_timer(cpu_data[cpu].vpe_id);
243 }
244 return IRQ_HANDLED;
245}
246
247
248int __cpuinit mips_clockevent_init(void)
249{
250 uint64_t mips_freq = mips_hpt_frequency;
251 unsigned int cpu = smp_processor_id();
252 struct clock_event_device *cd;
253 unsigned int irq;
254 int i;
255 int j;
256
257 if (!cpu_has_counter || !mips_hpt_frequency)
258 return -ENXIO;
259 if (cpu == 0) {
260 for (i = 0; i < num_possible_cpus(); i++) {
261 smtc_nextinvpe[i] = 0;
262 for (j = 0; j < num_possible_cpus(); j++)
263 smtc_nexttime[i][j] = 0L;
264 }
265 /*
266 * SMTC also can't have the usablility test
267 * run by secondary TCs once Compare is in use.
268 */
269 if (!c0_compare_int_usable())
270 return -ENXIO;
271 }
272
273 /*
274 * With vectored interrupts things are getting platform specific.
275 * get_c0_compare_int is a hook to allow a platform to return the
276 * interrupt number of it's liking.
277 */
278 irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq;
279 if (get_c0_compare_int)
280 irq = get_c0_compare_int();
281
282 cd = &per_cpu(mips_clockevent_device, cpu);
283
284 cd->name = "MIPS";
285 cd->features = CLOCK_EVT_FEAT_ONESHOT;
286
287 /* Calculate the min / max delta */
288 cd->mult = div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32);
289 cd->shift = 32;
290 cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
291 cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
292
293 cd->rating = 300;
294 cd->irq = irq;
295 cd->cpumask = cpumask_of_cpu(cpu);
296 cd->set_next_event = mips_next_event;
297 cd->set_mode = mips_set_clock_mode;
298 cd->event_handler = mips_event_handler;
299
300 clockevents_register_device(cd);
301
302 /*
303 * On SMTC we only want to do the data structure
304 * initialization and IRQ setup once.
305 */
306 if (cpu)
307 return 0;
308 /*
309 * And we need the hwmask associated with the c0_compare
310 * vector to be initialized.
311 */
312 irq_hwmask[irq] = (0x100 << cp0_compare_irq);
313 if (cp0_timer_irq_installed)
314 return 0;
315
316 cp0_timer_irq_installed = 1;
317
318 setup_irq(irq, &c0_compare_irqaction);
319
320 return 0;
321}
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 335a6ae3d594..e621fda8ab37 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -45,18 +45,7 @@ static void r39xx_wait(void)
45 local_irq_enable(); 45 local_irq_enable();
46} 46}
47 47
48/* 48extern void r4k_wait(void);
49 * There is a race when WAIT instruction executed with interrupt
50 * enabled.
51 * But it is implementation-dependent wheter the pipelie restarts when
52 * a non-enabled interrupt is requested.
53 */
54static void r4k_wait(void)
55{
56 __asm__(" .set mips3 \n"
57 " wait \n"
58 " .set mips0 \n");
59}
60 49
61/* 50/*
62 * This variant is preferable as it allows testing need_resched and going to 51 * This variant is preferable as it allows testing need_resched and going to
@@ -65,14 +54,18 @@ static void r4k_wait(void)
65 * interrupt is requested" restriction in the MIPS32/MIPS64 architecture makes 54 * interrupt is requested" restriction in the MIPS32/MIPS64 architecture makes
66 * using this version a gamble. 55 * using this version a gamble.
67 */ 56 */
68static void r4k_wait_irqoff(void) 57void r4k_wait_irqoff(void)
69{ 58{
70 local_irq_disable(); 59 local_irq_disable();
71 if (!need_resched()) 60 if (!need_resched())
72 __asm__(" .set mips3 \n" 61 __asm__(" .set push \n"
62 " .set mips3 \n"
73 " wait \n" 63 " wait \n"
74 " .set mips0 \n"); 64 " .set pop \n");
75 local_irq_enable(); 65 local_irq_enable();
66 __asm__(" .globl __pastwait \n"
67 "__pastwait: \n");
68 return;
76} 69}
77 70
78/* 71/*
@@ -128,7 +121,7 @@ static int __init wait_disable(char *s)
128 121
129__setup("nowait", wait_disable); 122__setup("nowait", wait_disable);
130 123
131static inline void check_wait(void) 124void __init check_wait(void)
132{ 125{
133 struct cpuinfo_mips *c = &current_cpu_data; 126 struct cpuinfo_mips *c = &current_cpu_data;
134 127
@@ -242,7 +235,6 @@ static inline void check_errata(void)
242 235
243void __init check_bugs32(void) 236void __init check_bugs32(void)
244{ 237{
245 check_wait();
246 check_errata(); 238 check_errata();
247} 239}
248 240
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index e29598ae939d..ffa331029e08 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -79,11 +79,6 @@ FEXPORT(syscall_exit)
79 79
80FEXPORT(restore_all) # restore full frame 80FEXPORT(restore_all) # restore full frame
81#ifdef CONFIG_MIPS_MT_SMTC 81#ifdef CONFIG_MIPS_MT_SMTC
82/* Detect and execute deferred IPI "interrupts" */
83 LONG_L s0, TI_REGS($28)
84 LONG_S sp, TI_REGS($28)
85 jal deferred_smtc_ipi
86 LONG_S s0, TI_REGS($28)
87#ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP 82#ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
88/* Re-arm any temporarily masked interrupts not explicitly "acked" */ 83/* Re-arm any temporarily masked interrupts not explicitly "acked" */
89 mfc0 v0, CP0_TCSTATUS 84 mfc0 v0, CP0_TCSTATUS
@@ -112,6 +107,11 @@ FEXPORT(restore_all) # restore full frame
112 xor t0, t0, t3 107 xor t0, t0, t3
113 mtc0 t0, CP0_TCCONTEXT 108 mtc0 t0, CP0_TCCONTEXT
114#endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */ 109#endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */
110/* Detect and execute deferred IPI "interrupts" */
111 LONG_L s0, TI_REGS($28)
112 LONG_S sp, TI_REGS($28)
113 jal deferred_smtc_ipi
114 LONG_S s0, TI_REGS($28)
115#endif /* CONFIG_MIPS_MT_SMTC */ 115#endif /* CONFIG_MIPS_MT_SMTC */
116 .set noat 116 .set noat
117 RESTORE_TEMP 117 RESTORE_TEMP
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index c6ada98ee042..01dcbe38fa01 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -20,6 +20,7 @@
20#include <asm/stackframe.h> 20#include <asm/stackframe.h>
21#include <asm/war.h> 21#include <asm/war.h>
22#include <asm/page.h> 22#include <asm/page.h>
23#include <asm/thread_info.h>
23 24
24#define PANIC_PIC(msg) \ 25#define PANIC_PIC(msg) \
25 .set push; \ 26 .set push; \
@@ -126,7 +127,42 @@ handle_vcei:
126 127
127 __FINIT 128 __FINIT
128 129
130 .align 5 /* 32 byte rollback region */
131LEAF(r4k_wait)
132 .set push
133 .set noreorder
134 /* start of rollback region */
135 LONG_L t0, TI_FLAGS($28)
136 nop
137 andi t0, _TIF_NEED_RESCHED
138 bnez t0, 1f
139 nop
140 nop
141 nop
142 .set mips3
143 wait
144 /* end of rollback region (the region size must be power of two) */
145 .set pop
1461:
147 jr ra
148 END(r4k_wait)
149
150 .macro BUILD_ROLLBACK_PROLOGUE handler
151 FEXPORT(rollback_\handler)
152 .set push
153 .set noat
154 MFC0 k0, CP0_EPC
155 PTR_LA k1, r4k_wait
156 ori k0, 0x1f /* 32 byte rollback region */
157 xori k0, 0x1f
158 bne k0, k1, 9f
159 MTC0 k0, CP0_EPC
1609:
161 .set pop
162 .endm
163
129 .align 5 164 .align 5
165BUILD_ROLLBACK_PROLOGUE handle_int
130NESTED(handle_int, PT_SIZE, sp) 166NESTED(handle_int, PT_SIZE, sp)
131#ifdef CONFIG_TRACE_IRQFLAGS 167#ifdef CONFIG_TRACE_IRQFLAGS
132 /* 168 /*
@@ -201,6 +237,7 @@ NESTED(except_vec_ejtag_debug, 0, sp)
201 * This prototype is copied to ebase + n*IntCtl.VS and patched 237 * This prototype is copied to ebase + n*IntCtl.VS and patched
202 * to invoke the handler 238 * to invoke the handler
203 */ 239 */
240BUILD_ROLLBACK_PROLOGUE except_vec_vi
204NESTED(except_vec_vi, 0, sp) 241NESTED(except_vec_vi, 0, sp)
205 SAVE_SOME 242 SAVE_SOME
206 SAVE_AT 243 SAVE_AT
@@ -245,8 +282,8 @@ NESTED(except_vec_vi_handler, 0, sp)
245 and t0, a0, t1 282 and t0, a0, t1
246#ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP 283#ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
247 mfc0 t2, CP0_TCCONTEXT 284 mfc0 t2, CP0_TCCONTEXT
248 or t0, t0, t2 285 or t2, t0, t2
249 mtc0 t0, CP0_TCCONTEXT 286 mtc0 t2, CP0_TCCONTEXT
250#endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */ 287#endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */
251 xor t1, t1, t0 288 xor t1, t1, t0
252 mtc0 t1, CP0_STATUS 289 mtc0 t1, CP0_STATUS
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 361364501d34..492a0a8d70fb 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -22,6 +22,7 @@
22#include <asm/irqflags.h> 22#include <asm/irqflags.h>
23#include <asm/regdef.h> 23#include <asm/regdef.h>
24#include <asm/page.h> 24#include <asm/page.h>
25#include <asm/pgtable-bits.h>
25#include <asm/mipsregs.h> 26#include <asm/mipsregs.h>
26#include <asm/stackframe.h> 27#include <asm/stackframe.h>
27 28
diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c
index 8f6d58ede33c..6e152c80cd4a 100644
--- a/arch/mips/kernel/kgdb.c
+++ b/arch/mips/kernel/kgdb.c
@@ -236,8 +236,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
236 236
237 atomic_set(&kgdb_cpu_doing_single_step, -1); 237 atomic_set(&kgdb_cpu_doing_single_step, -1);
238 if (remcom_in_buffer[0] == 's') 238 if (remcom_in_buffer[0] == 's')
239 if (kgdb_contthread) 239 atomic_set(&kgdb_cpu_doing_single_step, cpu);
240 atomic_set(&kgdb_cpu_doing_single_step, cpu);
241 240
242 return 0; 241 return 0;
243 } 242 }
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index df4d3f2f740c..dc9eb72ed9de 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -159,7 +159,7 @@ __setup("fpaff=", fpaff_thresh);
159/* 159/*
160 * FPU Use Factor empirically derived from experiments on 34K 160 * FPU Use Factor empirically derived from experiments on 34K
161 */ 161 */
162#define FPUSEFACTOR 333 162#define FPUSEFACTOR 2000
163 163
164static __init int mt_fp_affinity_init(void) 164static __init int mt_fp_affinity_init(void)
165{ 165{
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index b16facd9ea8e..22fc19bbe87f 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -55,7 +55,7 @@ void __noreturn cpu_idle(void)
55 while (1) { 55 while (1) {
56 tick_nohz_stop_sched_tick(1); 56 tick_nohz_stop_sched_tick(1);
57 while (!need_resched()) { 57 while (!need_resched()) {
58#ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG 58#ifdef CONFIG_MIPS_MT_SMTC
59 extern void smtc_idle_loop_hook(void); 59 extern void smtc_idle_loop_hook(void);
60 60
61 smtc_idle_loop_hook(); 61 smtc_idle_loop_hook();
@@ -145,17 +145,18 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
145 */ 145 */
146 p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1); 146 p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
147 childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); 147 childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
148 clear_tsk_thread_flag(p, TIF_USEDFPU);
149 148
150#ifdef CONFIG_MIPS_MT_FPAFF 149#ifdef CONFIG_MIPS_MT_SMTC
151 /* 150 /*
152 * FPU affinity support is cleaner if we track the 151 * SMTC restores TCStatus after Status, and the CU bits
153 * user-visible CPU affinity from the very beginning. 152 * are aliased there.
154 * The generic cpus_allowed mask will already have
155 * been copied from the parent before copy_thread
156 * is invoked.
157 */ 153 */
158 p->thread.user_cpus_allowed = p->cpus_allowed; 154 childregs->cp0_tcstatus &= ~(ST0_CU2|ST0_CU1);
155#endif
156 clear_tsk_thread_flag(p, TIF_USEDFPU);
157
158#ifdef CONFIG_MIPS_MT_FPAFF
159 clear_tsk_thread_flag(p, TIF_FPUBOUND);
159#endif /* CONFIG_MIPS_MT_FPAFF */ 160#endif /* CONFIG_MIPS_MT_FPAFF */
160 161
161 if (clone_flags & CLONE_SETTLS) 162 if (clone_flags & CLONE_SETTLS)
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 35234b92b9a5..96ffc9c6d194 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -238,7 +238,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
238 case FPC_EIR: { /* implementation / version register */ 238 case FPC_EIR: { /* implementation / version register */
239 unsigned int flags; 239 unsigned int flags;
240#ifdef CONFIG_MIPS_MT_SMTC 240#ifdef CONFIG_MIPS_MT_SMTC
241 unsigned int irqflags; 241 unsigned long irqflags;
242 unsigned int mtflags; 242 unsigned int mtflags;
243#endif /* CONFIG_MIPS_MT_SMTC */ 243#endif /* CONFIG_MIPS_MT_SMTC */
244 244
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index a516286532ab..897fb2b4751c 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -1,4 +1,21 @@
1/* Copyright (C) 2004 Mips Technologies, Inc */ 1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version 2
5 * of the License, or (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2004 Mips Technologies, Inc
17 * Copyright (C) 2008 Kevin D. Kissell
18 */
2 19
3#include <linux/clockchips.h> 20#include <linux/clockchips.h>
4#include <linux/kernel.h> 21#include <linux/kernel.h>
@@ -21,7 +38,6 @@
21#include <asm/time.h> 38#include <asm/time.h>
22#include <asm/addrspace.h> 39#include <asm/addrspace.h>
23#include <asm/smtc.h> 40#include <asm/smtc.h>
24#include <asm/smtc_ipi.h>
25#include <asm/smtc_proc.h> 41#include <asm/smtc_proc.h>
26 42
27/* 43/*
@@ -58,11 +74,6 @@ unsigned long irq_hwmask[NR_IRQS];
58 74
59asiduse smtc_live_asid[MAX_SMTC_TLBS][MAX_SMTC_ASIDS]; 75asiduse smtc_live_asid[MAX_SMTC_TLBS][MAX_SMTC_ASIDS];
60 76
61/*
62 * Clock interrupt "latch" buffers, per "CPU"
63 */
64
65static atomic_t ipi_timer_latch[NR_CPUS];
66 77
67/* 78/*
68 * Number of InterProcessor Interrupt (IPI) message buffers to allocate 79 * Number of InterProcessor Interrupt (IPI) message buffers to allocate
@@ -70,7 +81,7 @@ static atomic_t ipi_timer_latch[NR_CPUS];
70 81
71#define IPIBUF_PER_CPU 4 82#define IPIBUF_PER_CPU 4
72 83
73static struct smtc_ipi_q IPIQ[NR_CPUS]; 84struct smtc_ipi_q IPIQ[NR_CPUS];
74static struct smtc_ipi_q freeIPIq; 85static struct smtc_ipi_q freeIPIq;
75 86
76 87
@@ -282,7 +293,7 @@ static void smtc_configure_tlb(void)
282 * phys_cpu_present_map and the logical/physical mappings. 293 * phys_cpu_present_map and the logical/physical mappings.
283 */ 294 */
284 295
285int __init mipsmt_build_cpu_map(int start_cpu_slot) 296int __init smtc_build_cpu_map(int start_cpu_slot)
286{ 297{
287 int i, ntcs; 298 int i, ntcs;
288 299
@@ -325,7 +336,12 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)
325 write_tc_c0_tcstatus((read_tc_c0_tcstatus() 336 write_tc_c0_tcstatus((read_tc_c0_tcstatus()
326 & ~(TCSTATUS_TKSU | TCSTATUS_DA | TCSTATUS_IXMT)) 337 & ~(TCSTATUS_TKSU | TCSTATUS_DA | TCSTATUS_IXMT))
327 | TCSTATUS_A); 338 | TCSTATUS_A);
328 write_tc_c0_tccontext(0); 339 /*
340 * TCContext gets an offset from the base of the IPIQ array
341 * to be used in low-level code to detect the presence of
342 * an active IPI queue
343 */
344 write_tc_c0_tccontext((sizeof(struct smtc_ipi_q) * cpu) << 16);
329 /* Bind tc to vpe */ 345 /* Bind tc to vpe */
330 write_tc_c0_tcbind(vpe); 346 write_tc_c0_tcbind(vpe);
331 /* In general, all TCs should have the same cpu_data indications */ 347 /* In general, all TCs should have the same cpu_data indications */
@@ -336,10 +352,18 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)
336 cpu_data[cpu].options &= ~MIPS_CPU_FPU; 352 cpu_data[cpu].options &= ~MIPS_CPU_FPU;
337 cpu_data[cpu].vpe_id = vpe; 353 cpu_data[cpu].vpe_id = vpe;
338 cpu_data[cpu].tc_id = tc; 354 cpu_data[cpu].tc_id = tc;
355 /* Multi-core SMTC hasn't been tested, but be prepared */
356 cpu_data[cpu].core = (read_vpe_c0_ebase() >> 1) & 0xff;
339} 357}
340 358
359/*
360 * Tweak to get Count registes in as close a sync as possible.
361 * Value seems good for 34K-class cores.
362 */
363
364#define CP0_SKEW 8
341 365
342void mipsmt_prepare_cpus(void) 366void smtc_prepare_cpus(int cpus)
343{ 367{
344 int i, vpe, tc, ntc, nvpe, tcpervpe[NR_CPUS], slop, cpu; 368 int i, vpe, tc, ntc, nvpe, tcpervpe[NR_CPUS], slop, cpu;
345 unsigned long flags; 369 unsigned long flags;
@@ -363,13 +387,13 @@ void mipsmt_prepare_cpus(void)
363 IPIQ[i].head = IPIQ[i].tail = NULL; 387 IPIQ[i].head = IPIQ[i].tail = NULL;
364 spin_lock_init(&IPIQ[i].lock); 388 spin_lock_init(&IPIQ[i].lock);
365 IPIQ[i].depth = 0; 389 IPIQ[i].depth = 0;
366 atomic_set(&ipi_timer_latch[i], 0);
367 } 390 }
368 391
369 /* cpu_data index starts at zero */ 392 /* cpu_data index starts at zero */
370 cpu = 0; 393 cpu = 0;
371 cpu_data[cpu].vpe_id = 0; 394 cpu_data[cpu].vpe_id = 0;
372 cpu_data[cpu].tc_id = 0; 395 cpu_data[cpu].tc_id = 0;
396 cpu_data[cpu].core = (read_c0_ebase() >> 1) & 0xff;
373 cpu++; 397 cpu++;
374 398
375 /* Report on boot-time options */ 399 /* Report on boot-time options */
@@ -484,7 +508,8 @@ void mipsmt_prepare_cpus(void)
484 write_vpe_c0_compare(0); 508 write_vpe_c0_compare(0);
485 /* Propagate Config7 */ 509 /* Propagate Config7 */
486 write_vpe_c0_config7(read_c0_config7()); 510 write_vpe_c0_config7(read_c0_config7());
487 write_vpe_c0_count(read_c0_count()); 511 write_vpe_c0_count(read_c0_count() + CP0_SKEW);
512 ehb();
488 } 513 }
489 /* enable multi-threading within VPE */ 514 /* enable multi-threading within VPE */
490 write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() | VPECONTROL_TE); 515 write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() | VPECONTROL_TE);
@@ -556,7 +581,7 @@ void mipsmt_prepare_cpus(void)
556void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle) 581void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle)
557{ 582{
558 extern u32 kernelsp[NR_CPUS]; 583 extern u32 kernelsp[NR_CPUS];
559 long flags; 584 unsigned long flags;
560 int mtflags; 585 int mtflags;
561 586
562 LOCK_MT_PRA(); 587 LOCK_MT_PRA();
@@ -585,24 +610,22 @@ void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle)
585 610
586void smtc_init_secondary(void) 611void smtc_init_secondary(void)
587{ 612{
588 /*
589 * Start timer on secondary VPEs if necessary.
590 * plat_timer_setup has already have been invoked by init/main
591 * on "boot" TC. Like per_cpu_trap_init() hack, this assumes that
592 * SMTC init code assigns TCs consdecutively and in ascending order
593 * to across available VPEs.
594 */
595 if (((read_c0_tcbind() & TCBIND_CURTC) != 0) &&
596 ((read_c0_tcbind() & TCBIND_CURVPE)
597 != cpu_data[smp_processor_id() - 1].vpe_id)){
598 write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
599 }
600
601 local_irq_enable(); 613 local_irq_enable();
602} 614}
603 615
604void smtc_smp_finish(void) 616void smtc_smp_finish(void)
605{ 617{
618 int cpu = smp_processor_id();
619
620 /*
621 * Lowest-numbered CPU per VPE starts a clock tick.
622 * Like per_cpu_trap_init() hack, this assumes that
623 * SMTC init code assigns TCs consdecutively and
624 * in ascending order across available VPEs.
625 */
626 if (cpu > 0 && (cpu_data[cpu].vpe_id != cpu_data[cpu - 1].vpe_id))
627 write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
628
606 printk("TC %d going on-line as CPU %d\n", 629 printk("TC %d going on-line as CPU %d\n",
607 cpu_data[smp_processor_id()].tc_id, smp_processor_id()); 630 cpu_data[smp_processor_id()].tc_id, smp_processor_id());
608} 631}
@@ -753,8 +776,10 @@ void smtc_send_ipi(int cpu, int type, unsigned int action)
753{ 776{
754 int tcstatus; 777 int tcstatus;
755 struct smtc_ipi *pipi; 778 struct smtc_ipi *pipi;
756 long flags; 779 unsigned long flags;
757 int mtflags; 780 int mtflags;
781 unsigned long tcrestart;
782 extern void r4k_wait_irqoff(void), __pastwait(void);
758 783
759 if (cpu == smp_processor_id()) { 784 if (cpu == smp_processor_id()) {
760 printk("Cannot Send IPI to self!\n"); 785 printk("Cannot Send IPI to self!\n");
@@ -771,8 +796,6 @@ void smtc_send_ipi(int cpu, int type, unsigned int action)
771 pipi->arg = (void *)action; 796 pipi->arg = (void *)action;
772 pipi->dest = cpu; 797 pipi->dest = cpu;
773 if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) { 798 if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) {
774 if (type == SMTC_CLOCK_TICK)
775 atomic_inc(&ipi_timer_latch[cpu]);
776 /* If not on same VPE, enqueue and send cross-VPE interrupt */ 799 /* If not on same VPE, enqueue and send cross-VPE interrupt */
777 smtc_ipi_nq(&IPIQ[cpu], pipi); 800 smtc_ipi_nq(&IPIQ[cpu], pipi);
778 LOCK_CORE_PRA(); 801 LOCK_CORE_PRA();
@@ -800,22 +823,29 @@ void smtc_send_ipi(int cpu, int type, unsigned int action)
800 823
801 if ((tcstatus & TCSTATUS_IXMT) != 0) { 824 if ((tcstatus & TCSTATUS_IXMT) != 0) {
802 /* 825 /*
803 * Spin-waiting here can deadlock, 826 * If we're in the the irq-off version of the wait
804 * so we queue the message for the target TC. 827 * loop, we need to force exit from the wait and
828 * do a direct post of the IPI.
829 */
830 if (cpu_wait == r4k_wait_irqoff) {
831 tcrestart = read_tc_c0_tcrestart();
832 if (tcrestart >= (unsigned long)r4k_wait_irqoff
833 && tcrestart < (unsigned long)__pastwait) {
834 write_tc_c0_tcrestart(__pastwait);
835 tcstatus &= ~TCSTATUS_IXMT;
836 write_tc_c0_tcstatus(tcstatus);
837 goto postdirect;
838 }
839 }
840 /*
841 * Otherwise we queue the message for the target TC
842 * to pick up when he does a local_irq_restore()
805 */ 843 */
806 write_tc_c0_tchalt(0); 844 write_tc_c0_tchalt(0);
807 UNLOCK_CORE_PRA(); 845 UNLOCK_CORE_PRA();
808 /* Try to reduce redundant timer interrupt messages */
809 if (type == SMTC_CLOCK_TICK) {
810 if (atomic_postincrement(&ipi_timer_latch[cpu])!=0){
811 smtc_ipi_nq(&freeIPIq, pipi);
812 return;
813 }
814 }
815 smtc_ipi_nq(&IPIQ[cpu], pipi); 846 smtc_ipi_nq(&IPIQ[cpu], pipi);
816 } else { 847 } else {
817 if (type == SMTC_CLOCK_TICK) 848postdirect:
818 atomic_inc(&ipi_timer_latch[cpu]);
819 post_direct_ipi(cpu, pipi); 849 post_direct_ipi(cpu, pipi);
820 write_tc_c0_tchalt(0); 850 write_tc_c0_tchalt(0);
821 UNLOCK_CORE_PRA(); 851 UNLOCK_CORE_PRA();
@@ -883,7 +913,7 @@ static void ipi_call_interrupt(void)
883 smp_call_function_interrupt(); 913 smp_call_function_interrupt();
884} 914}
885 915
886DECLARE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device); 916DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device);
887 917
888void ipi_decode(struct smtc_ipi *pipi) 918void ipi_decode(struct smtc_ipi *pipi)
889{ 919{
@@ -891,20 +921,13 @@ void ipi_decode(struct smtc_ipi *pipi)
891 struct clock_event_device *cd; 921 struct clock_event_device *cd;
892 void *arg_copy = pipi->arg; 922 void *arg_copy = pipi->arg;
893 int type_copy = pipi->type; 923 int type_copy = pipi->type;
894 int ticks;
895
896 smtc_ipi_nq(&freeIPIq, pipi); 924 smtc_ipi_nq(&freeIPIq, pipi);
897 switch (type_copy) { 925 switch (type_copy) {
898 case SMTC_CLOCK_TICK: 926 case SMTC_CLOCK_TICK:
899 irq_enter(); 927 irq_enter();
900 kstat_this_cpu.irqs[MIPS_CPU_IRQ_BASE + 1]++; 928 kstat_this_cpu.irqs[MIPS_CPU_IRQ_BASE + 1]++;
901 cd = &per_cpu(smtc_dummy_clockevent_device, cpu); 929 cd = &per_cpu(mips_clockevent_device, cpu);
902 ticks = atomic_read(&ipi_timer_latch[cpu]); 930 cd->event_handler(cd);
903 atomic_sub(ticks, &ipi_timer_latch[cpu]);
904 while (ticks) {
905 cd->event_handler(cd);
906 ticks--;
907 }
908 irq_exit(); 931 irq_exit();
909 break; 932 break;
910 933
@@ -937,24 +960,48 @@ void ipi_decode(struct smtc_ipi *pipi)
937 } 960 }
938} 961}
939 962
963/*
964 * Similar to smtc_ipi_replay(), but invoked from context restore,
965 * so it reuses the current exception frame rather than set up a
966 * new one with self_ipi.
967 */
968
940void deferred_smtc_ipi(void) 969void deferred_smtc_ipi(void)
941{ 970{
942 struct smtc_ipi *pipi; 971 int cpu = smp_processor_id();
943 unsigned long flags;
944/* DEBUG */
945 int q = smp_processor_id();
946 972
947 /* 973 /*
948 * Test is not atomic, but much faster than a dequeue, 974 * Test is not atomic, but much faster than a dequeue,
949 * and the vast majority of invocations will have a null queue. 975 * and the vast majority of invocations will have a null queue.
976 * If irq_disabled when this was called, then any IPIs queued
977 * after we test last will be taken on the next irq_enable/restore.
978 * If interrupts were enabled, then any IPIs added after the
979 * last test will be taken directly.
950 */ 980 */
951 if (IPIQ[q].head != NULL) { 981
952 while((pipi = smtc_ipi_dq(&IPIQ[q])) != NULL) { 982 while (IPIQ[cpu].head != NULL) {
953 /* ipi_decode() should be called with interrupts off */ 983 struct smtc_ipi_q *q = &IPIQ[cpu];
954 local_irq_save(flags); 984 struct smtc_ipi *pipi;
985 unsigned long flags;
986
987 /*
988 * It may be possible we'll come in with interrupts
989 * already enabled.
990 */
991 local_irq_save(flags);
992
993 spin_lock(&q->lock);
994 pipi = __smtc_ipi_dq(q);
995 spin_unlock(&q->lock);
996 if (pipi != NULL)
955 ipi_decode(pipi); 997 ipi_decode(pipi);
956 local_irq_restore(flags); 998 /*
957 } 999 * The use of the __raw_local restore isn't
1000 * as obviously necessary here as in smtc_ipi_replay(),
1001 * but it's more efficient, given that we're already
1002 * running down the IPI queue.
1003 */
1004 __raw_local_irq_restore(flags);
958 } 1005 }
959} 1006}
960 1007
@@ -975,7 +1022,7 @@ static irqreturn_t ipi_interrupt(int irq, void *dev_idm)
975 struct smtc_ipi *pipi; 1022 struct smtc_ipi *pipi;
976 unsigned long tcstatus; 1023 unsigned long tcstatus;
977 int sent; 1024 int sent;
978 long flags; 1025 unsigned long flags;
979 unsigned int mtflags; 1026 unsigned int mtflags;
980 unsigned int vpflags; 1027 unsigned int vpflags;
981 1028
@@ -1066,55 +1113,53 @@ static void setup_cross_vpe_interrupts(unsigned int nvpe)
1066 1113
1067/* 1114/*
1068 * SMTC-specific hacks invoked from elsewhere in the kernel. 1115 * SMTC-specific hacks invoked from elsewhere in the kernel.
1069 *
1070 * smtc_ipi_replay is called from raw_local_irq_restore which is only ever
1071 * called with interrupts disabled. We do rely on interrupts being disabled
1072 * here because using spin_lock_irqsave()/spin_unlock_irqrestore() would
1073 * result in a recursive call to raw_local_irq_restore().
1074 */ 1116 */
1075 1117
1076static void __smtc_ipi_replay(void) 1118 /*
1119 * smtc_ipi_replay is called from raw_local_irq_restore
1120 */
1121
1122void smtc_ipi_replay(void)
1077{ 1123{
1078 unsigned int cpu = smp_processor_id(); 1124 unsigned int cpu = smp_processor_id();
1079 1125
1080 /* 1126 /*
1081 * To the extent that we've ever turned interrupts off, 1127 * To the extent that we've ever turned interrupts off,
1082 * we may have accumulated deferred IPIs. This is subtle. 1128 * we may have accumulated deferred IPIs. This is subtle.
1083 * If we use the smtc_ipi_qdepth() macro, we'll get an
1084 * exact number - but we'll also disable interrupts
1085 * and create a window of failure where a new IPI gets
1086 * queued after we test the depth but before we re-enable
1087 * interrupts. So long as IXMT never gets set, however,
1088 * we should be OK: If we pick up something and dispatch 1129 * we should be OK: If we pick up something and dispatch
1089 * it here, that's great. If we see nothing, but concurrent 1130 * it here, that's great. If we see nothing, but concurrent
1090 * with this operation, another TC sends us an IPI, IXMT 1131 * with this operation, another TC sends us an IPI, IXMT
1091 * is clear, and we'll handle it as a real pseudo-interrupt 1132 * is clear, and we'll handle it as a real pseudo-interrupt
1092 * and not a pseudo-pseudo interrupt. 1133 * and not a pseudo-pseudo interrupt. The important thing
1134 * is to do the last check for queued message *after* the
1135 * re-enabling of interrupts.
1093 */ 1136 */
1094 if (IPIQ[cpu].depth > 0) { 1137 while (IPIQ[cpu].head != NULL) {
1095 while (1) { 1138 struct smtc_ipi_q *q = &IPIQ[cpu];
1096 struct smtc_ipi_q *q = &IPIQ[cpu]; 1139 struct smtc_ipi *pipi;
1097 struct smtc_ipi *pipi; 1140 unsigned long flags;
1098 extern void self_ipi(struct smtc_ipi *); 1141
1099 1142 /*
1100 spin_lock(&q->lock); 1143 * It's just possible we'll come in with interrupts
1101 pipi = __smtc_ipi_dq(q); 1144 * already enabled.
1102 spin_unlock(&q->lock); 1145 */
1103 if (!pipi) 1146 local_irq_save(flags);
1104 break; 1147
1148 spin_lock(&q->lock);
1149 pipi = __smtc_ipi_dq(q);
1150 spin_unlock(&q->lock);
1151 /*
1152 ** But use a raw restore here to avoid recursion.
1153 */
1154 __raw_local_irq_restore(flags);
1105 1155
1156 if (pipi) {
1106 self_ipi(pipi); 1157 self_ipi(pipi);
1107 smtc_cpu_stats[cpu].selfipis++; 1158 smtc_cpu_stats[cpu].selfipis++;
1108 } 1159 }
1109 } 1160 }
1110} 1161}
1111 1162
1112void smtc_ipi_replay(void)
1113{
1114 raw_local_irq_disable();
1115 __smtc_ipi_replay();
1116}
1117
1118EXPORT_SYMBOL(smtc_ipi_replay); 1163EXPORT_SYMBOL(smtc_ipi_replay);
1119 1164
1120void smtc_idle_loop_hook(void) 1165void smtc_idle_loop_hook(void)
@@ -1193,40 +1238,13 @@ void smtc_idle_loop_hook(void)
1193 } 1238 }
1194 } 1239 }
1195 1240
1196 /*
1197 * Now that we limit outstanding timer IPIs, check for hung TC
1198 */
1199 for (tc = 0; tc < NR_CPUS; tc++) {
1200 /* Don't check ourself - we'll dequeue IPIs just below */
1201 if ((tc != smp_processor_id()) &&
1202 atomic_read(&ipi_timer_latch[tc]) > timerq_limit) {
1203 if (clock_hang_reported[tc] == 0) {
1204 pdb_msg += sprintf(pdb_msg,
1205 "TC %d looks hung with timer latch at %d\n",
1206 tc, atomic_read(&ipi_timer_latch[tc]));
1207 clock_hang_reported[tc]++;
1208 }
1209 }
1210 }
1211 emt(mtflags); 1241 emt(mtflags);
1212 local_irq_restore(flags); 1242 local_irq_restore(flags);
1213 if (pdb_msg != &id_ho_db_msg[0]) 1243 if (pdb_msg != &id_ho_db_msg[0])
1214 printk("CPU%d: %s", smp_processor_id(), id_ho_db_msg); 1244 printk("CPU%d: %s", smp_processor_id(), id_ho_db_msg);
1215#endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */ 1245#endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */
1216 1246
1217 /* 1247 smtc_ipi_replay();
1218 * Replay any accumulated deferred IPIs. If "Instant Replay"
1219 * is in use, there should never be any.
1220 */
1221#ifndef CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY
1222 {
1223 unsigned long flags;
1224
1225 local_irq_save(flags);
1226 __smtc_ipi_replay();
1227 local_irq_restore(flags);
1228 }
1229#endif /* CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY */
1230} 1248}
1231 1249
1232void smtc_soft_dump(void) 1250void smtc_soft_dump(void)
@@ -1242,10 +1260,6 @@ void smtc_soft_dump(void)
1242 printk("%d: %ld\n", i, smtc_cpu_stats[i].selfipis); 1260 printk("%d: %ld\n", i, smtc_cpu_stats[i].selfipis);
1243 } 1261 }
1244 smtc_ipi_qdump(); 1262 smtc_ipi_qdump();
1245 printk("Timer IPI Backlogs:\n");
1246 for (i=0; i < NR_CPUS; i++) {
1247 printk("%d: %d\n", i, atomic_read(&ipi_timer_latch[i]));
1248 }
1249 printk("%d Recoveries of \"stolen\" FPU\n", 1263 printk("%d Recoveries of \"stolen\" FPU\n",
1250 atomic_read(&smtc_fpu_recoveries)); 1264 atomic_read(&smtc_fpu_recoveries));
1251} 1265}
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 6bee29097a56..b602ac6eb47d 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -46,6 +46,9 @@
46#include <asm/types.h> 46#include <asm/types.h>
47#include <asm/stacktrace.h> 47#include <asm/stacktrace.h>
48 48
49extern void check_wait(void);
50extern asmlinkage void r4k_wait(void);
51extern asmlinkage void rollback_handle_int(void);
49extern asmlinkage void handle_int(void); 52extern asmlinkage void handle_int(void);
50extern asmlinkage void handle_tlbm(void); 53extern asmlinkage void handle_tlbm(void);
51extern asmlinkage void handle_tlbl(void); 54extern asmlinkage void handle_tlbl(void);
@@ -822,8 +825,10 @@ static void mt_ase_fp_affinity(void)
822 if (cpus_intersects(current->cpus_allowed, mt_fpu_cpumask)) { 825 if (cpus_intersects(current->cpus_allowed, mt_fpu_cpumask)) {
823 cpumask_t tmask; 826 cpumask_t tmask;
824 827
825 cpus_and(tmask, current->thread.user_cpus_allowed, 828 current->thread.user_cpus_allowed
826 mt_fpu_cpumask); 829 = current->cpus_allowed;
830 cpus_and(tmask, current->cpus_allowed,
831 mt_fpu_cpumask);
827 set_cpus_allowed(current, tmask); 832 set_cpus_allowed(current, tmask);
828 set_thread_flag(TIF_FPUBOUND); 833 set_thread_flag(TIF_FPUBOUND);
829 } 834 }
@@ -1251,6 +1256,9 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs)
1251 1256
1252 extern char except_vec_vi, except_vec_vi_lui; 1257 extern char except_vec_vi, except_vec_vi_lui;
1253 extern char except_vec_vi_ori, except_vec_vi_end; 1258 extern char except_vec_vi_ori, except_vec_vi_end;
1259 extern char rollback_except_vec_vi;
1260 char *vec_start = (cpu_wait == r4k_wait) ?
1261 &rollback_except_vec_vi : &except_vec_vi;
1254#ifdef CONFIG_MIPS_MT_SMTC 1262#ifdef CONFIG_MIPS_MT_SMTC
1255 /* 1263 /*
1256 * We need to provide the SMTC vectored interrupt handler 1264 * We need to provide the SMTC vectored interrupt handler
@@ -1258,11 +1266,11 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs)
1258 * Status.IM bit to be masked before going there. 1266 * Status.IM bit to be masked before going there.
1259 */ 1267 */
1260 extern char except_vec_vi_mori; 1268 extern char except_vec_vi_mori;
1261 const int mori_offset = &except_vec_vi_mori - &except_vec_vi; 1269 const int mori_offset = &except_vec_vi_mori - vec_start;
1262#endif /* CONFIG_MIPS_MT_SMTC */ 1270#endif /* CONFIG_MIPS_MT_SMTC */
1263 const int handler_len = &except_vec_vi_end - &except_vec_vi; 1271 const int handler_len = &except_vec_vi_end - vec_start;
1264 const int lui_offset = &except_vec_vi_lui - &except_vec_vi; 1272 const int lui_offset = &except_vec_vi_lui - vec_start;
1265 const int ori_offset = &except_vec_vi_ori - &except_vec_vi; 1273 const int ori_offset = &except_vec_vi_ori - vec_start;
1266 1274
1267 if (handler_len > VECTORSPACING) { 1275 if (handler_len > VECTORSPACING) {
1268 /* 1276 /*
@@ -1272,7 +1280,7 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs)
1272 panic("VECTORSPACING too small"); 1280 panic("VECTORSPACING too small");
1273 } 1281 }
1274 1282
1275 memcpy(b, &except_vec_vi, handler_len); 1283 memcpy(b, vec_start, handler_len);
1276#ifdef CONFIG_MIPS_MT_SMTC 1284#ifdef CONFIG_MIPS_MT_SMTC
1277 BUG_ON(n > 7); /* Vector index %d exceeds SMTC maximum. */ 1285 BUG_ON(n > 7); /* Vector index %d exceeds SMTC maximum. */
1278 1286
@@ -1554,6 +1562,10 @@ void __init trap_init(void)
1554 extern char except_vec3_generic, except_vec3_r4000; 1562 extern char except_vec3_generic, except_vec3_r4000;
1555 extern char except_vec4; 1563 extern char except_vec4;
1556 unsigned long i; 1564 unsigned long i;
1565 int rollback;
1566
1567 check_wait();
1568 rollback = (cpu_wait == r4k_wait);
1557 1569
1558#if defined(CONFIG_KGDB) 1570#if defined(CONFIG_KGDB)
1559 if (kgdb_early_setup) 1571 if (kgdb_early_setup)
@@ -1618,7 +1630,7 @@ void __init trap_init(void)
1618 if (board_be_init) 1630 if (board_be_init)
1619 board_be_init(); 1631 board_be_init();
1620 1632
1621 set_except_vector(0, handle_int); 1633 set_except_vector(0, rollback ? rollback_handle_int : handle_int);
1622 set_except_vector(1, handle_tlbm); 1634 set_except_vector(1, handle_tlbm);
1623 set_except_vector(2, handle_tlbl); 1635 set_except_vector(2, handle_tlbl);
1624 set_except_vector(3, handle_tlbs); 1636 set_except_vector(3, handle_tlbs);
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index b5470ceb418b..afb119f35682 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -36,6 +36,7 @@ SECTIONS
36 SCHED_TEXT 36 SCHED_TEXT
37 LOCK_TEXT 37 LOCK_TEXT
38 KPROBES_TEXT 38 KPROBES_TEXT
39 *(.text.*)
39 *(.fixup) 40 *(.fixup)
40 *(.gnu.warning) 41 *(.gnu.warning)
41 } :text = 0 42 } :text = 0
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 8d7784122c14..edac9892c51a 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -39,12 +39,14 @@
39#ifdef USE_DOUBLE 39#ifdef USE_DOUBLE
40 40
41#define LOAD ld 41#define LOAD ld
42#define LOAD32 lwu
42#define ADD daddu 43#define ADD daddu
43#define NBYTES 8 44#define NBYTES 8
44 45
45#else 46#else
46 47
47#define LOAD lw 48#define LOAD lw
49#define LOAD32 lw
48#define ADD addu 50#define ADD addu
49#define NBYTES 4 51#define NBYTES 4
50 52
@@ -60,6 +62,14 @@
60 ADD sum, v1; \ 62 ADD sum, v1; \
61 .set pop 63 .set pop
62 64
65#define ADDC32(sum,reg) \
66 .set push; \
67 .set noat; \
68 addu sum, reg; \
69 sltu v1, sum, reg; \
70 addu sum, v1; \
71 .set pop
72
63#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ 73#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
64 LOAD _t0, (offset + UNIT(0))(src); \ 74 LOAD _t0, (offset + UNIT(0))(src); \
65 LOAD _t1, (offset + UNIT(1))(src); \ 75 LOAD _t1, (offset + UNIT(1))(src); \
@@ -132,7 +142,7 @@ LEAF(csum_partial)
132 beqz t8, .Lqword_align 142 beqz t8, .Lqword_align
133 andi t8, src, 0x8 143 andi t8, src, 0x8
134 144
135 lw t0, 0x00(src) 145 LOAD32 t0, 0x00(src)
136 LONG_SUBU a1, a1, 0x4 146 LONG_SUBU a1, a1, 0x4
137 ADDC(sum, t0) 147 ADDC(sum, t0)
138 PTR_ADDU src, src, 0x4 148 PTR_ADDU src, src, 0x4
@@ -211,7 +221,7 @@ LEAF(csum_partial)
211 LONG_SRL t8, t8, 0x2 221 LONG_SRL t8, t8, 0x2
212 222
213.Lend_words: 223.Lend_words:
214 lw t0, (src) 224 LOAD32 t0, (src)
215 LONG_SUBU t8, t8, 0x1 225 LONG_SUBU t8, t8, 0x1
216 ADDC(sum, t0) 226 ADDC(sum, t0)
217 .set reorder /* DADDI_WAR */ 227 .set reorder /* DADDI_WAR */
@@ -230,6 +240,9 @@ LEAF(csum_partial)
230 /* Still a full word to go */ 240 /* Still a full word to go */
231 ulw t1, (src) 241 ulw t1, (src)
232 PTR_ADDIU src, 4 242 PTR_ADDIU src, 4
243#ifdef USE_DOUBLE
244 dsll t1, t1, 32 /* clear lower 32bit */
245#endif
233 ADDC(sum, t1) 246 ADDC(sum, t1)
234 247
2351: move t1, zero 2481: move t1, zero
@@ -280,7 +293,7 @@ LEAF(csum_partial)
2801: 2931:
281 .set reorder 294 .set reorder
282 /* Add the passed partial csum. */ 295 /* Add the passed partial csum. */
283 ADDC(sum, a2) 296 ADDC32(sum, a2)
284 jr ra 297 jr ra
285 .set noreorder 298 .set noreorder
286 END(csum_partial) 299 END(csum_partial)
@@ -681,7 +694,7 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc)
681 .set pop 694 .set pop
6821: 6951:
683 .set reorder 696 .set reorder
684 ADDC(sum, psum) 697 ADDC32(sum, psum)
685 jr ra 698 jr ra
686 .set noreorder 699 .set noreorder
687 700
diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile
index 3b7dd722c32a..cef2db8d2225 100644
--- a/arch/mips/mti-malta/Makefile
+++ b/arch/mips/mti-malta/Makefile
@@ -15,6 +15,6 @@ obj-$(CONFIG_EARLY_PRINTK) += malta-console.o
15obj-$(CONFIG_PCI) += malta-pci.o 15obj-$(CONFIG_PCI) += malta-pci.o
16 16
17# FIXME FIXME FIXME 17# FIXME FIXME FIXME
18obj-$(CONFIG_MIPS_MT_SMTC) += malta_smtc.o 18obj-$(CONFIG_MIPS_MT_SMTC) += malta-smtc.o
19 19
20EXTRA_CFLAGS += -Werror 20EXTRA_CFLAGS += -Werror
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index 5ea705e49454..f84a46a8ae6e 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -84,12 +84,17 @@ static void msmtc_cpus_done(void)
84 84
85static void __init msmtc_smp_setup(void) 85static void __init msmtc_smp_setup(void)
86{ 86{
87 mipsmt_build_cpu_map(0); 87 /*
88 * we won't get the definitive value until
89 * we've run smtc_prepare_cpus later, but
90 * we would appear to need an upper bound now.
91 */
92 smp_num_siblings = smtc_build_cpu_map(0);
88} 93}
89 94
90static void __init msmtc_prepare_cpus(unsigned int max_cpus) 95static void __init msmtc_prepare_cpus(unsigned int max_cpus)
91{ 96{
92 mipsmt_prepare_cpus(); 97 smtc_prepare_cpus(max_cpus);
93} 98}
94 99
95struct plat_smp_ops msmtc_smp_ops = { 100struct plat_smp_ops msmtc_smp_ops = {
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index 15e01aec37fd..c8c32f417b6c 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_SOC_TX3927) += ops-tx3927.o
15obj-$(CONFIG_PCI_VR41XX) += ops-vr41xx.o pci-vr41xx.o 15obj-$(CONFIG_PCI_VR41XX) += ops-vr41xx.o pci-vr41xx.o
16obj-$(CONFIG_MARKEINS) += ops-emma2rh.o pci-emma2rh.o fixup-emma2rh.o 16obj-$(CONFIG_MARKEINS) += ops-emma2rh.o pci-emma2rh.o fixup-emma2rh.o
17obj-$(CONFIG_PCI_TX4927) += ops-tx4927.o 17obj-$(CONFIG_PCI_TX4927) += ops-tx4927.o
18obj-$(CONFIG_BCM47XX) += pci-bcm47xx.o
18 19
19# 20#
20# These are still pretty much in the old state, watch, go blind. 21# These are still pretty much in the old state, watch, go blind.
diff --git a/arch/mips/pci/pci-bcm47xx.c b/arch/mips/pci/pci-bcm47xx.c
new file mode 100644
index 000000000000..bea9b6cdfdbf
--- /dev/null
+++ b/arch/mips/pci/pci-bcm47xx.c
@@ -0,0 +1,60 @@
1/*
2 * Copyright (C) 2008 Aurelien Jarno <aurelien@aurel32.net>
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the
6 * Free Software Foundation; either version 2 of the License, or (at your
7 * option) any later version.
8 *
9 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
10 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
12 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
13 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
14 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
15 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
16 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
17 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
18 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with this program; if not, write to the Free Software Foundation, Inc.,
22 * 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25#include <linux/types.h>
26#include <linux/pci.h>
27#include <linux/ssb/ssb.h>
28
29int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
30{
31 return 0;
32}
33
34int pcibios_plat_dev_init(struct pci_dev *dev)
35{
36 int res;
37 u8 slot, pin;
38
39 res = ssb_pcibios_plat_dev_init(dev);
40 if (res < 0) {
41 printk(KERN_ALERT "PCI: Failed to init device %s\n",
42 pci_name(dev));
43 return res;
44 }
45
46 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
47 slot = PCI_SLOT(dev->devfn);
48 res = ssb_pcibios_map_irq(dev, slot, pin);
49
50 /* IRQ-0 and IRQ-1 are software interrupts. */
51 if (res < 2) {
52 printk(KERN_ALERT "PCI: Failed to map IRQ of device %s\n",
53 pci_name(dev));
54 return res;
55 }
56
57 dev->irq = res;
58 return 0;
59}
60
diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c
index bd78368c82bf..f97ab1461012 100644
--- a/arch/mips/pci/pci-ip27.c
+++ b/arch/mips/pci/pci-ip27.c
@@ -143,25 +143,47 @@ int __cpuinit bridge_probe(nasid_t nasid, int widget_id, int masterwid)
143 */ 143 */
144int __devinit pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) 144int __devinit pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
145{ 145{
146 return 0;
147}
148
149/* Most MIPS systems have straight-forward swizzling needs. */
150static inline u8 bridge_swizzle(u8 pin, u8 slot)
151{
152 return (((pin - 1) + slot) % 4) + 1;
153}
154
155static inline struct pci_dev *bridge_root_dev(struct pci_dev *dev)
156{
157 while (dev->bus->parent) {
158 /* Move up the chain of bridges. */
159 dev = dev->bus->self;
160 }
161
162 return dev;
163}
164
165/* Do platform specific device initialization at pci_enable_device() time */
166int pcibios_plat_dev_init(struct pci_dev *dev)
167{
146 struct bridge_controller *bc = BRIDGE_CONTROLLER(dev->bus); 168 struct bridge_controller *bc = BRIDGE_CONTROLLER(dev->bus);
147 int irq = bc->pci_int[slot]; 169 struct pci_dev *rdev = bridge_root_dev(dev);
170 int slot = PCI_SLOT(rdev->devfn);
171 int irq;
148 172
173 irq = bc->pci_int[slot];
149 if (irq == -1) { 174 if (irq == -1) {
150 irq = bc->pci_int[slot] = request_bridge_irq(bc); 175 irq = request_bridge_irq(bc);
151 if (irq < 0) 176 if (irq < 0)
152 panic("Can't allocate interrupt for PCI device %s\n", 177 return irq;
153 pci_name(dev)); 178
179 bc->pci_int[slot] = irq;
154 } 180 }
155 181
156 irq_to_bridge[irq] = bc; 182 irq_to_bridge[irq] = bc;
157 irq_to_slot[irq] = slot; 183 irq_to_slot[irq] = slot;
158 184
159 return irq; 185 dev->irq = irq;
160}
161 186
162/* Do platform specific device initialization at pci_enable_device() time */
163int pcibios_plat_dev_init(struct pci_dev *dev)
164{
165 return 0; 187 return 0;
166} 188}
167 189
diff --git a/arch/mips/sibyte/swarm/Makefile b/arch/mips/sibyte/swarm/Makefile
index f18ba9201bbc..7b45f199d92a 100644
--- a/arch/mips/sibyte/swarm/Makefile
+++ b/arch/mips/sibyte/swarm/Makefile
@@ -1,3 +1,4 @@
1obj-y := setup.o rtc_xicor1241.o rtc_m41t81.o 1obj-y := platform.o setup.o rtc_xicor1241.o \
2 rtc_m41t81.o
2 3
3obj-$(CONFIG_I2C_BOARDINFO) += swarm-i2c.o 4obj-$(CONFIG_I2C_BOARDINFO) += swarm-i2c.o
diff --git a/arch/mips/sibyte/swarm/platform.c b/arch/mips/sibyte/swarm/platform.c
new file mode 100644
index 000000000000..54847fe1e564
--- /dev/null
+++ b/arch/mips/sibyte/swarm/platform.c
@@ -0,0 +1,85 @@
1#include <linux/err.h>
2#include <linux/kernel.h>
3#include <linux/init.h>
4#include <linux/io.h>
5#include <linux/platform_device.h>
6#include <linux/ata_platform.h>
7
8#include <asm/sibyte/board.h>
9#include <asm/sibyte/sb1250_genbus.h>
10#include <asm/sibyte/sb1250_regs.h>
11
12#if defined(CONFIG_SIBYTE_SWARM) || defined(CONFIG_SIBYTE_LITTLESUR)
13
14#define DRV_NAME "pata-swarm"
15
16#define SWARM_IDE_SHIFT 5
17#define SWARM_IDE_BASE 0x1f0
18#define SWARM_IDE_CTRL 0x3f6
19
20static struct resource swarm_pata_resource[] = {
21 {
22 .name = "Swarm GenBus IDE",
23 .flags = IORESOURCE_MEM,
24 }, {
25 .name = "Swarm GenBus IDE",
26 .flags = IORESOURCE_MEM,
27 }, {
28 .name = "Swarm GenBus IDE",
29 .flags = IORESOURCE_IRQ,
30 .start = K_INT_GB_IDE,
31 .end = K_INT_GB_IDE,
32 },
33};
34
35static struct pata_platform_info pata_platform_data = {
36 .ioport_shift = SWARM_IDE_SHIFT,
37};
38
39static struct platform_device swarm_pata_device = {
40 .name = "pata_platform",
41 .id = -1,
42 .resource = swarm_pata_resource,
43 .num_resources = ARRAY_SIZE(swarm_pata_resource),
44 .dev = {
45 .platform_data = &pata_platform_data,
46 .coherent_dma_mask = ~0, /* grumble */
47 },
48};
49
50static int __init swarm_pata_init(void)
51{
52 u8 __iomem *base;
53 phys_t offset, size;
54 struct resource *r;
55
56 if (!SIBYTE_HAVE_IDE)
57 return -ENODEV;
58
59 base = ioremap(A_IO_EXT_BASE, 0x800);
60 offset = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_START_ADDR, IDE_CS));
61 size = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_MULT_SIZE, IDE_CS));
62 iounmap(base);
63
64 offset = G_IO_START_ADDR(offset) << S_IO_ADDRBASE;
65 size = (G_IO_MULT_SIZE(size) + 1) << S_IO_REGSIZE;
66 if (offset < A_PHYS_GENBUS || offset >= A_PHYS_GENBUS_END) {
67 pr_info(DRV_NAME ": PATA interface at GenBus disabled\n");
68
69 return -EBUSY;
70 }
71
72 pr_info(DRV_NAME ": PATA interface at GenBus slot %i\n", IDE_CS);
73
74 r = swarm_pata_resource;
75 r[0].start = offset + (SWARM_IDE_BASE << SWARM_IDE_SHIFT);
76 r[0].end = offset + ((SWARM_IDE_BASE + 8) << SWARM_IDE_SHIFT) - 1;
77 r[1].start = offset + (SWARM_IDE_CTRL << SWARM_IDE_SHIFT);
78 r[1].end = offset + ((SWARM_IDE_CTRL + 1) << SWARM_IDE_SHIFT) - 1;
79
80 return platform_device_register(&swarm_pata_device);
81}
82
83device_initcall(swarm_pata_init);
84
85#endif /* defined(CONFIG_SIBYTE_SWARM) || defined(CONFIG_SIBYTE_LITTLESUR) */
diff --git a/arch/mips/vr41xx/common/irq.c b/arch/mips/vr41xx/common/irq.c
index cba36a247e32..92dd1a0ca352 100644
--- a/arch/mips/vr41xx/common/irq.c
+++ b/arch/mips/vr41xx/common/irq.c
@@ -72,6 +72,7 @@ static void irq_dispatch(unsigned int irq)
72 cascade = irq_cascade + irq; 72 cascade = irq_cascade + irq;
73 if (cascade->get_irq != NULL) { 73 if (cascade->get_irq != NULL) {
74 unsigned int source_irq = irq; 74 unsigned int source_irq = irq;
75 int ret;
75 desc = irq_desc + source_irq; 76 desc = irq_desc + source_irq;
76 if (desc->chip->mask_ack) 77 if (desc->chip->mask_ack)
77 desc->chip->mask_ack(source_irq); 78 desc->chip->mask_ack(source_irq);
@@ -79,8 +80,9 @@ static void irq_dispatch(unsigned int irq)
79 desc->chip->mask(source_irq); 80 desc->chip->mask(source_irq);
80 desc->chip->ack(source_irq); 81 desc->chip->ack(source_irq);
81 } 82 }
82 irq = cascade->get_irq(irq); 83 ret = cascade->get_irq(irq);
83 if (irq < 0) 84 irq = ret;
85 if (ret < 0)
84 atomic_inc(&irq_err_count); 86 atomic_inc(&irq_err_count);
85 else 87 else
86 irq_dispatch(irq); 88 irq_dispatch(irq);
diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c
index 761c434a2488..56c64ccc9c21 100644
--- a/arch/mn10300/kernel/irq.c
+++ b/arch/mn10300/kernel/irq.c
@@ -20,22 +20,8 @@ EXPORT_SYMBOL(__mn10300_irq_enabled_epsw);
20atomic_t irq_err_count; 20atomic_t irq_err_count;
21 21
22/* 22/*
23 * MN10300 INTC controller operations 23 * MN10300 interrupt controller operations
24 */ 24 */
25static void mn10300_cpupic_disable(unsigned int irq)
26{
27 u16 tmp = GxICR(irq);
28 GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_DETECT;
29 tmp = GxICR(irq);
30}
31
32static void mn10300_cpupic_enable(unsigned int irq)
33{
34 u16 tmp = GxICR(irq);
35 GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_ENABLE;
36 tmp = GxICR(irq);
37}
38
39static void mn10300_cpupic_ack(unsigned int irq) 25static void mn10300_cpupic_ack(unsigned int irq)
40{ 26{
41 u16 tmp; 27 u16 tmp;
@@ -60,26 +46,54 @@ static void mn10300_cpupic_mask_ack(unsigned int irq)
60static void mn10300_cpupic_unmask(unsigned int irq) 46static void mn10300_cpupic_unmask(unsigned int irq)
61{ 47{
62 u16 tmp = GxICR(irq); 48 u16 tmp = GxICR(irq);
63 GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_ENABLE | GxICR_DETECT; 49 GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_ENABLE;
64 tmp = GxICR(irq); 50 tmp = GxICR(irq);
65} 51}
66 52
67static void mn10300_cpupic_end(unsigned int irq) 53static void mn10300_cpupic_unmask_clear(unsigned int irq)
68{ 54{
55 /* the MN10300 PIC latches its interrupt request bit, even after the
56 * device has ceased to assert its interrupt line and the interrupt
57 * channel has been disabled in the PIC, so for level-triggered
58 * interrupts we need to clear the request bit when we re-enable */
69 u16 tmp = GxICR(irq); 59 u16 tmp = GxICR(irq);
70 GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_ENABLE; 60 GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_ENABLE | GxICR_DETECT;
71 tmp = GxICR(irq); 61 tmp = GxICR(irq);
72} 62}
73 63
74static struct irq_chip mn10300_cpu_pic = { 64/*
75 .name = "cpu", 65 * MN10300 PIC level-triggered IRQ handling.
76 .disable = mn10300_cpupic_disable, 66 *
77 .enable = mn10300_cpupic_enable, 67 * The PIC has no 'ACK' function per se. It is possible to clear individual
68 * channel latches, but each latch relatches whether or not the channel is
69 * masked, so we need to clear the latch when we unmask the channel.
70 *
71 * Also for this reason, we don't supply an ack() op (it's unused anyway if
72 * mask_ack() is provided), and mask_ack() just masks.
73 */
74static struct irq_chip mn10300_cpu_pic_level = {
75 .name = "cpu_l",
76 .disable = mn10300_cpupic_mask,
77 .enable = mn10300_cpupic_unmask_clear,
78 .ack = NULL,
79 .mask = mn10300_cpupic_mask,
80 .mask_ack = mn10300_cpupic_mask,
81 .unmask = mn10300_cpupic_unmask_clear,
82};
83
84/*
85 * MN10300 PIC edge-triggered IRQ handling.
86 *
87 * We use the latch clearing function of the PIC as the 'ACK' function.
88 */
89static struct irq_chip mn10300_cpu_pic_edge = {
90 .name = "cpu_e",
91 .disable = mn10300_cpupic_mask,
92 .enable = mn10300_cpupic_unmask,
78 .ack = mn10300_cpupic_ack, 93 .ack = mn10300_cpupic_ack,
79 .mask = mn10300_cpupic_mask, 94 .mask = mn10300_cpupic_mask,
80 .mask_ack = mn10300_cpupic_mask_ack, 95 .mask_ack = mn10300_cpupic_mask_ack,
81 .unmask = mn10300_cpupic_unmask, 96 .unmask = mn10300_cpupic_unmask,
82 .end = mn10300_cpupic_end,
83}; 97};
84 98
85/* 99/*
@@ -114,7 +128,8 @@ void set_intr_level(int irq, u16 level)
114 */ 128 */
115void set_intr_postackable(int irq) 129void set_intr_postackable(int irq)
116{ 130{
117 set_irq_handler(irq, handle_level_irq); 131 set_irq_chip_and_handler(irq, &mn10300_cpu_pic_level,
132 handle_level_irq);
118} 133}
119 134
120/* 135/*
@@ -126,8 +141,12 @@ void __init init_IRQ(void)
126 141
127 for (irq = 0; irq < NR_IRQS; irq++) 142 for (irq = 0; irq < NR_IRQS; irq++)
128 if (irq_desc[irq].chip == &no_irq_type) 143 if (irq_desc[irq].chip == &no_irq_type)
129 set_irq_chip_and_handler(irq, &mn10300_cpu_pic, 144 /* due to the PIC latching interrupt requests, even
130 handle_edge_irq); 145 * when the IRQ is disabled, IRQ_PENDING is superfluous
146 * and we can use handle_level_irq() for edge-triggered
147 * interrupts */
148 set_irq_chip_and_handler(irq, &mn10300_cpu_pic_edge,
149 handle_level_irq);
131 unit_init_IRQ(); 150 unit_init_IRQ();
132} 151}
133 152
diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c
index babb7c2ac377..e4606586f94c 100644
--- a/arch/mn10300/kernel/time.c
+++ b/arch/mn10300/kernel/time.c
@@ -1,6 +1,6 @@
1/* MN10300 Low level time management 1/* MN10300 Low level time management
2 * 2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2007-2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * - Derived from arch/i386/kernel/time.c 5 * - Derived from arch/i386/kernel/time.c
6 * 6 *
@@ -16,6 +16,7 @@
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/smp.h> 17#include <linux/smp.h>
18#include <linux/profile.h> 18#include <linux/profile.h>
19#include <linux/cnt32_to_63.h>
19#include <asm/irq.h> 20#include <asm/irq.h>
20#include <asm/div64.h> 21#include <asm/div64.h>
21#include <asm/processor.h> 22#include <asm/processor.h>
@@ -40,27 +41,54 @@ static struct irqaction timer_irq = {
40 .name = "timer", 41 .name = "timer",
41}; 42};
42 43
44static unsigned long sched_clock_multiplier;
45
43/* 46/*
44 * scheduler clock - returns current time in nanosec units. 47 * scheduler clock - returns current time in nanosec units.
45 */ 48 */
46unsigned long long sched_clock(void) 49unsigned long long sched_clock(void)
47{ 50{
48 union { 51 union {
49 unsigned long long l; 52 unsigned long long ll;
50 u32 w[2]; 53 unsigned l[2];
51 } quot; 54 } tsc64, result;
55 unsigned long tsc, tmp;
56 unsigned product[3]; /* 96-bit intermediate value */
57
58 /* read the TSC value
59 */
60 tsc = 0 - get_cycles(); /* get_cycles() counts down */
52 61
53 quot.w[0] = mn10300_last_tsc - get_cycles(); 62 /* expand to 64-bits.
54 quot.w[1] = 1000000000; 63 * - sched_clock() must be called once a minute or better or the
64 * following will go horribly wrong - see cnt32_to_63()
65 */
66 tsc64.ll = cnt32_to_63(tsc) & 0x7fffffffffffffffULL;
55 67
56 asm("mulu %2,%3,%0,%1" 68 /* scale the 64-bit TSC value to a nanosecond value via a 96-bit
57 : "=r"(quot.w[1]), "=r"(quot.w[0]) 69 * intermediate
58 : "0"(quot.w[1]), "1"(quot.w[0]) 70 */
71 asm("mulu %2,%0,%3,%0 \n" /* LSW * mult -> 0:%3:%0 */
72 "mulu %2,%1,%2,%1 \n" /* MSW * mult -> %2:%1:0 */
73 "add %3,%1 \n"
74 "addc 0,%2 \n" /* result in %2:%1:%0 */
75 : "=r"(product[0]), "=r"(product[1]), "=r"(product[2]), "=r"(tmp)
76 : "0"(tsc64.l[0]), "1"(tsc64.l[1]), "2"(sched_clock_multiplier)
59 : "cc"); 77 : "cc");
60 78
61 do_div(quot.l, MN10300_TSCCLK); 79 result.l[0] = product[1] << 16 | product[0] >> 16;
80 result.l[1] = product[2] << 16 | product[1] >> 16;
62 81
63 return quot.l; 82 return result.ll;
83}
84
85/*
86 * initialise the scheduler clock
87 */
88static void __init mn10300_sched_clock_init(void)
89{
90 sched_clock_multiplier =
91 __muldiv64u(NSEC_PER_SEC, 1 << 16, MN10300_TSCCLK);
64} 92}
65 93
66/* 94/*
@@ -128,4 +156,6 @@ void __init time_init(void)
128 /* start the watchdog timer */ 156 /* start the watchdog timer */
129 watchdog_go(); 157 watchdog_go();
130#endif 158#endif
159
160 mn10300_sched_clock_init();
131} 161}
diff --git a/arch/mn10300/unit-asb2303/unit-init.c b/arch/mn10300/unit-asb2303/unit-init.c
index 14b2c817cff8..70e8cb4ea266 100644
--- a/arch/mn10300/unit-asb2303/unit-init.c
+++ b/arch/mn10300/unit-asb2303/unit-init.c
@@ -51,7 +51,7 @@ void __init unit_init_IRQ(void)
51 switch (GET_XIRQ_TRIGGER(extnum)) { 51 switch (GET_XIRQ_TRIGGER(extnum)) {
52 case XIRQ_TRIGGER_HILEVEL: 52 case XIRQ_TRIGGER_HILEVEL:
53 case XIRQ_TRIGGER_LOWLEVEL: 53 case XIRQ_TRIGGER_LOWLEVEL:
54 set_irq_handler(XIRQ2IRQ(extnum), handle_level_irq); 54 set_intr_postackable(XIRQ2IRQ(extnum));
55 break; 55 break;
56 default: 56 default:
57 break; 57 break;
diff --git a/arch/mn10300/unit-asb2305/unit-init.c b/arch/mn10300/unit-asb2305/unit-init.c
index 6a352414a358..72812a9439ac 100644
--- a/arch/mn10300/unit-asb2305/unit-init.c
+++ b/arch/mn10300/unit-asb2305/unit-init.c
@@ -52,7 +52,7 @@ void __init unit_init_IRQ(void)
52 switch (GET_XIRQ_TRIGGER(extnum)) { 52 switch (GET_XIRQ_TRIGGER(extnum)) {
53 case XIRQ_TRIGGER_HILEVEL: 53 case XIRQ_TRIGGER_HILEVEL:
54 case XIRQ_TRIGGER_LOWLEVEL: 54 case XIRQ_TRIGGER_LOWLEVEL:
55 set_irq_handler(XIRQ2IRQ(extnum), handle_level_irq); 55 set_intr_postackable(XIRQ2IRQ(extnum));
56 break; 56 break;
57 default: 57 default:
58 break; 58 break;
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 717a3bc1352e..65d1a8454d2c 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -195,7 +195,7 @@ image-$(CONFIG_PPC_CELLEB) += zImage.pseries
195image-$(CONFIG_PPC_CHRP) += zImage.chrp 195image-$(CONFIG_PPC_CHRP) += zImage.chrp
196image-$(CONFIG_PPC_EFIKA) += zImage.chrp 196image-$(CONFIG_PPC_EFIKA) += zImage.chrp
197image-$(CONFIG_PPC_PMAC) += zImage.pmac 197image-$(CONFIG_PPC_PMAC) += zImage.pmac
198image-$(CONFIG_PPC_HOLLY) += zImage.holly 198image-$(CONFIG_PPC_HOLLY) += dtbImage.holly
199image-$(CONFIG_PPC_PRPMC2800) += dtbImage.prpmc2800 199image-$(CONFIG_PPC_PRPMC2800) += dtbImage.prpmc2800
200image-$(CONFIG_PPC_ISERIES) += zImage.iseries 200image-$(CONFIG_PPC_ISERIES) += zImage.iseries
201image-$(CONFIG_DEFAULT_UIMAGE) += uImage 201image-$(CONFIG_DEFAULT_UIMAGE) += uImage
diff --git a/arch/powerpc/boot/dts/holly.dts b/arch/powerpc/boot/dts/holly.dts
index f87fe7b9ced9..c6e11ebecebb 100644
--- a/arch/powerpc/boot/dts/holly.dts
+++ b/arch/powerpc/boot/dts/holly.dts
@@ -133,61 +133,61 @@
133 reg = <0x00007400 0x00000400>; 133 reg = <0x00007400 0x00000400>;
134 big-endian; 134 big-endian;
135 }; 135 };
136 };
136 137
137 pci@1000 { 138 pci@c0001000 {
138 device_type = "pci"; 139 device_type = "pci";
139 compatible = "tsi109-pci", "tsi108-pci"; 140 compatible = "tsi109-pci", "tsi108-pci";
140 #interrupt-cells = <1>; 141 #interrupt-cells = <1>;
141 #size-cells = <2>; 142 #size-cells = <2>;
142 #address-cells = <3>; 143 #address-cells = <3>;
143 reg = <0x00001000 0x00001000>; 144 reg = <0xc0001000 0x00001000>;
144 bus-range = <0x0 0x0>; 145 bus-range = <0x0 0x0>;
145 /*----------------------------------------------------+ 146 /*----------------------------------------------------+
146 | PCI memory range. 147 | PCI memory range.
147 | 01 denotes I/O space 148 | 01 denotes I/O space
148 | 02 denotes 32-bit memory space 149 | 02 denotes 32-bit memory space
149 +----------------------------------------------------*/ 150 +----------------------------------------------------*/
150 ranges = <0x02000000 0x00000000 0x40000000 0x40000000 0x00000000 0x10000000 151 ranges = <0x02000000 0x00000000 0x40000000 0x40000000 0x00000000 0x10000000
151 0x01000000 0x00000000 0x00000000 0x7e000000 0x00000000 0x00010000>; 152 0x01000000 0x00000000 0x00000000 0x7e000000 0x00000000 0x00010000>;
152 clock-frequency = <133333332>; 153 clock-frequency = <133333332>;
153 interrupt-parent = <&MPIC>; 154 interrupt-parent = <&MPIC>;
155 interrupts = <0x17 0x2>;
156 interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
157 /*----------------------------------------------------+
158 | The INTA, INTB, INTC, INTD are shared.
159 +----------------------------------------------------*/
160 interrupt-map = <
161 0x800 0x0 0x0 0x1 &RT0 0x24 0x0
162 0x800 0x0 0x0 0x2 &RT0 0x25 0x0
163 0x800 0x0 0x0 0x3 &RT0 0x26 0x0
164 0x800 0x0 0x0 0x4 &RT0 0x27 0x0
165
166 0x1000 0x0 0x0 0x1 &RT0 0x25 0x0
167 0x1000 0x0 0x0 0x2 &RT0 0x26 0x0
168 0x1000 0x0 0x0 0x3 &RT0 0x27 0x0
169 0x1000 0x0 0x0 0x4 &RT0 0x24 0x0
170
171 0x1800 0x0 0x0 0x1 &RT0 0x26 0x0
172 0x1800 0x0 0x0 0x2 &RT0 0x27 0x0
173 0x1800 0x0 0x0 0x3 &RT0 0x24 0x0
174 0x1800 0x0 0x0 0x4 &RT0 0x25 0x0
175
176 0x2000 0x0 0x0 0x1 &RT0 0x27 0x0
177 0x2000 0x0 0x0 0x2 &RT0 0x24 0x0
178 0x2000 0x0 0x0 0x3 &RT0 0x25 0x0
179 0x2000 0x0 0x0 0x4 &RT0 0x26 0x0
180 >;
181
182 RT0: router@1180 {
183 device_type = "pic-router";
184 interrupt-controller;
185 big-endian;
186 clock-frequency = <0>;
187 #address-cells = <0>;
188 #interrupt-cells = <2>;
154 interrupts = <0x17 0x2>; 189 interrupts = <0x17 0x2>;
155 interrupt-map-mask = <0xf800 0x0 0x0 0x7>; 190 interrupt-parent = <&MPIC>;
156 /*----------------------------------------------------+
157 | The INTA, INTB, INTC, INTD are shared.
158 +----------------------------------------------------*/
159 interrupt-map = <
160 0x800 0x0 0x0 0x1 &RT0 0x24 0x0
161 0x800 0x0 0x0 0x2 &RT0 0x25 0x0
162 0x800 0x0 0x0 0x3 &RT0 0x26 0x0
163 0x800 0x0 0x0 0x4 &RT0 0x27 0x0
164
165 0x1000 0x0 0x0 0x1 &RT0 0x25 0x0
166 0x1000 0x0 0x0 0x2 &RT0 0x26 0x0
167 0x1000 0x0 0x0 0x3 &RT0 0x27 0x0
168 0x1000 0x0 0x0 0x4 &RT0 0x24 0x0
169
170 0x1800 0x0 0x0 0x1 &RT0 0x26 0x0
171 0x1800 0x0 0x0 0x2 &RT0 0x27 0x0
172 0x1800 0x0 0x0 0x3 &RT0 0x24 0x0
173 0x1800 0x0 0x0 0x4 &RT0 0x25 0x0
174
175 0x2000 0x0 0x0 0x1 &RT0 0x27 0x0
176 0x2000 0x0 0x0 0x2 &RT0 0x24 0x0
177 0x2000 0x0 0x0 0x3 &RT0 0x25 0x0
178 0x2000 0x0 0x0 0x4 &RT0 0x26 0x0
179 >;
180
181 RT0: router@1180 {
182 device_type = "pic-router";
183 interrupt-controller;
184 big-endian;
185 clock-frequency = <0>;
186 #address-cells = <0>;
187 #interrupt-cells = <2>;
188 interrupts = <0x17 0x2>;
189 interrupt-parent = <&MPIC>;
190 };
191 }; 191 };
192 }; 192 };
193 193
diff --git a/arch/powerpc/boot/dts/mpc8610_hpcd.dts b/arch/powerpc/boot/dts/mpc8610_hpcd.dts
index 3b3a1062cb25..584a4f184eb2 100644
--- a/arch/powerpc/boot/dts/mpc8610_hpcd.dts
+++ b/arch/powerpc/boot/dts/mpc8610_hpcd.dts
@@ -281,7 +281,7 @@
281 cell-index = <0>; 281 cell-index = <0>;
282 reg = <0x0 0x80>; 282 reg = <0x0 0x80>;
283 interrupt-parent = <&mpic>; 283 interrupt-parent = <&mpic>;
284 interrupts = <60 2>; 284 interrupts = <76 2>;
285 }; 285 };
286 dma-channel@1 { 286 dma-channel@1 {
287 compatible = "fsl,mpc8610-dma-channel", 287 compatible = "fsl,mpc8610-dma-channel",
@@ -289,7 +289,7 @@
289 cell-index = <1>; 289 cell-index = <1>;
290 reg = <0x80 0x80>; 290 reg = <0x80 0x80>;
291 interrupt-parent = <&mpic>; 291 interrupt-parent = <&mpic>;
292 interrupts = <61 2>; 292 interrupts = <77 2>;
293 }; 293 };
294 dma-channel@2 { 294 dma-channel@2 {
295 compatible = "fsl,mpc8610-dma-channel", 295 compatible = "fsl,mpc8610-dma-channel",
@@ -297,7 +297,7 @@
297 cell-index = <2>; 297 cell-index = <2>;
298 reg = <0x100 0x80>; 298 reg = <0x100 0x80>;
299 interrupt-parent = <&mpic>; 299 interrupt-parent = <&mpic>;
300 interrupts = <62 2>; 300 interrupts = <78 2>;
301 }; 301 };
302 dma-channel@3 { 302 dma-channel@3 {
303 compatible = "fsl,mpc8610-dma-channel", 303 compatible = "fsl,mpc8610-dma-channel",
@@ -305,7 +305,7 @@
305 cell-index = <3>; 305 cell-index = <3>;
306 reg = <0x180 0x80>; 306 reg = <0x180 0x80>;
307 interrupt-parent = <&mpic>; 307 interrupt-parent = <&mpic>;
308 interrupts = <63 2>; 308 interrupts = <79 2>;
309 }; 309 };
310 }; 310 };
311 311
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 80d1f399ee51..64c6ee22eefd 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -409,6 +409,13 @@ do { \
409/* Keep this the last entry. */ 409/* Keep this the last entry. */
410#define R_PPC64_NUM 107 410#define R_PPC64_NUM 107
411 411
412/* There's actually a third entry here, but it's unused */
413struct ppc64_opd_entry
414{
415 unsigned long funcaddr;
416 unsigned long r2;
417};
418
412#ifdef __KERNEL__ 419#ifdef __KERNEL__
413 420
414#ifdef CONFIG_SPU_BASE 421#ifdef CONFIG_SPU_BASE
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 7710e9e6660f..07956f3e7844 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -2,6 +2,8 @@
2#define _ASM_POWERPC_SECTIONS_H 2#define _ASM_POWERPC_SECTIONS_H
3#ifdef __KERNEL__ 3#ifdef __KERNEL__
4 4
5#include <linux/elf.h>
6#include <linux/uaccess.h>
5#include <asm-generic/sections.h> 7#include <asm-generic/sections.h>
6 8
7#ifdef __powerpc64__ 9#ifdef __powerpc64__
@@ -17,7 +19,15 @@ static inline int in_kernel_text(unsigned long addr)
17} 19}
18 20
19#undef dereference_function_descriptor 21#undef dereference_function_descriptor
20void *dereference_function_descriptor(void *); 22static inline void *dereference_function_descriptor(void *ptr)
23{
24 struct ppc64_opd_entry *desc = ptr;
25 void *p;
26
27 if (!probe_kernel_address(&desc->funcaddr, p))
28 ptr = p;
29 return ptr;
30}
21 31
22#endif 32#endif
23 33
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index d308a9f70f1b..31982d05d81a 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -34,11 +34,7 @@
34#include <asm/smp.h> 34#include <asm/smp.h>
35 35
36#ifdef CONFIG_HOTPLUG_CPU 36#ifdef CONFIG_HOTPLUG_CPU
37/* this is used for software suspend, and that shuts down 37#define cpu_should_die() cpu_is_offline(smp_processor_id())
38 * CPUs even while the system is still booting... */
39#define cpu_should_die() (cpu_is_offline(smp_processor_id()) && \
40 (system_state == SYSTEM_RUNNING \
41 || system_state == SYSTEM_BOOTING))
42#else 38#else
43#define cpu_should_die() 0 39#define cpu_should_die() 0
44#endif 40#endif
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index b4fdf2f2743c..fe8f71dd0b3f 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -347,9 +347,8 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
347 linux_regs->msr |= MSR_SE; 347 linux_regs->msr |= MSR_SE;
348#endif 348#endif
349 kgdb_single_step = 1; 349 kgdb_single_step = 1;
350 if (kgdb_contthread) 350 atomic_set(&kgdb_cpu_doing_single_step,
351 atomic_set(&kgdb_cpu_doing_single_step, 351 raw_smp_processor_id());
352 raw_smp_processor_id());
353 } 352 }
354 return 0; 353 return 0;
355 } 354 }
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index ad79de272ff3..1af2377e4992 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -21,9 +21,7 @@
21#include <linux/err.h> 21#include <linux/err.h>
22#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
23#include <linux/bug.h> 23#include <linux/bug.h>
24#include <linux/uaccess.h>
25#include <asm/module.h> 24#include <asm/module.h>
26#include <asm/sections.h>
27#include <asm/firmware.h> 25#include <asm/firmware.h>
28#include <asm/code-patching.h> 26#include <asm/code-patching.h>
29#include <linux/sort.h> 27#include <linux/sort.h>
@@ -43,13 +41,6 @@
43#define DEBUGP(fmt , ...) 41#define DEBUGP(fmt , ...)
44#endif 42#endif
45 43
46/* There's actually a third entry here, but it's unused */
47struct ppc64_opd_entry
48{
49 unsigned long funcaddr;
50 unsigned long r2;
51};
52
53/* Like PPC32, we need little trampolines to do > 24-bit jumps (into 44/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
54 the kernel itself). But on PPC64, these need to be used for every 45 the kernel itself). But on PPC64, these need to be used for every
55 jump, actually, to reset r2 (TOC+0x8000). */ 46 jump, actually, to reset r2 (TOC+0x8000). */
@@ -452,13 +443,3 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
452 443
453 return 0; 444 return 0;
454} 445}
455
456void *dereference_function_descriptor(void *ptr)
457{
458 struct ppc64_opd_entry *desc = ptr;
459 void *p;
460
461 if (!probe_kernel_address(&desc->funcaddr, p))
462 ptr = p;
463 return ptr;
464}
diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
index ef74a0763ec1..8c619963becc 100644
--- a/arch/powerpc/platforms/fsl_uli1575.c
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -219,11 +219,21 @@ static void __devinit quirk_final_uli5249(struct pci_dev *dev)
219 int i; 219 int i;
220 u8 *dummy; 220 u8 *dummy;
221 struct pci_bus *bus = dev->bus; 221 struct pci_bus *bus = dev->bus;
222 resource_size_t end = 0;
223
224 for (i = PCI_BRIDGE_RESOURCES; i < PCI_BRIDGE_RESOURCES+3; i++) {
225 unsigned long flags = pci_resource_flags(dev, i);
226 if ((flags & (IORESOURCE_MEM|IORESOURCE_PREFETCH)) == IORESOURCE_MEM)
227 end = pci_resource_end(dev, i);
228 }
222 229
223 for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) { 230 for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
224 if ((bus->resource[i]) && 231 if ((bus->resource[i]) &&
225 (bus->resource[i]->flags & IORESOURCE_MEM)) { 232 (bus->resource[i]->flags & IORESOURCE_MEM)) {
226 dummy = ioremap(bus->resource[i]->end - 3, 0x4); 233 if (bus->resource[i]->end == end)
234 dummy = ioremap(bus->resource[i]->start, 0x4);
235 else
236 dummy = ioremap(bus->resource[i]->end - 3, 0x4);
227 if (dummy) { 237 if (dummy) {
228 in_8(dummy); 238 in_8(dummy);
229 iounmap(dummy); 239 iounmap(dummy);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index ca114fe46ffb..06acb1a18bbc 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -169,6 +169,8 @@ void init_cpu_timer(void)
169 169
170static void clock_comparator_interrupt(__u16 code) 170static void clock_comparator_interrupt(__u16 code)
171{ 171{
172 if (S390_lowcore.clock_comparator == -1ULL)
173 set_clock_comparator(S390_lowcore.clock_comparator);
172} 174}
173 175
174static void etr_timing_alert(struct etr_irq_parm *); 176static void etr_timing_alert(struct etr_irq_parm *);
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index fc6ab6094df8..0953cee05efc 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -1,14 +1,9 @@
1/* 1/*
2 * arch/s390/lib/delay.c
3 * Precise Delay Loops for S390 2 * Precise Delay Loops for S390
4 * 3 *
5 * S390 version 4 * Copyright IBM Corp. 1999,2008
6 * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation 5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
7 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), 6 * Heiko Carstens <heiko.carstens@de.ibm.com>,
8 *
9 * Derived from "arch/i386/lib/delay.c"
10 * Copyright (C) 1993 Linus Torvalds
11 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
12 */ 7 */
13 8
14#include <linux/sched.h> 9#include <linux/sched.h>
@@ -29,30 +24,31 @@ void __delay(unsigned long loops)
29 asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1)); 24 asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
30} 25}
31 26
32/* 27static void __udelay_disabled(unsigned long usecs)
33 * Waits for 'usecs' microseconds using the TOD clock comparator.
34 */
35void __udelay(unsigned long usecs)
36{ 28{
37 u64 end, time, old_cc = 0; 29 unsigned long mask, cr0, cr0_saved;
38 unsigned long flags, cr0, mask, dummy; 30 u64 clock_saved;
39 int irq_context;
40 31
41 irq_context = in_interrupt(); 32 clock_saved = local_tick_disable();
42 if (!irq_context) 33 set_clock_comparator(get_clock() + ((u64) usecs << 12));
43 local_bh_disable(); 34 __ctl_store(cr0_saved, 0, 0);
44 local_irq_save(flags); 35 cr0 = (cr0_saved & 0xffff00e0) | 0x00000800;
45 if (raw_irqs_disabled_flags(flags)) { 36 __ctl_load(cr0 , 0, 0);
46 old_cc = local_tick_disable(); 37 mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
47 S390_lowcore.clock_comparator = -1ULL; 38 trace_hardirqs_on();
48 __ctl_store(cr0, 0, 0); 39 __load_psw_mask(mask);
49 dummy = (cr0 & 0xffff00e0) | 0x00000800; 40 local_irq_disable();
50 __ctl_load(dummy , 0, 0); 41 __ctl_load(cr0_saved, 0, 0);
51 mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; 42 local_tick_enable(clock_saved);
52 } else 43 set_clock_comparator(S390_lowcore.clock_comparator);
53 mask = psw_kernel_bits | PSW_MASK_WAIT | 44}
54 PSW_MASK_EXT | PSW_MASK_IO;
55 45
46static void __udelay_enabled(unsigned long usecs)
47{
48 unsigned long mask;
49 u64 end, time;
50
51 mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO;
56 end = get_clock() + ((u64) usecs << 12); 52 end = get_clock() + ((u64) usecs << 12);
57 do { 53 do {
58 time = end < S390_lowcore.clock_comparator ? 54 time = end < S390_lowcore.clock_comparator ?
@@ -62,13 +58,37 @@ void __udelay(unsigned long usecs)
62 __load_psw_mask(mask); 58 __load_psw_mask(mask);
63 local_irq_disable(); 59 local_irq_disable();
64 } while (get_clock() < end); 60 } while (get_clock() < end);
61 set_clock_comparator(S390_lowcore.clock_comparator);
62}
65 63
66 if (raw_irqs_disabled_flags(flags)) { 64/*
67 __ctl_load(cr0, 0, 0); 65 * Waits for 'usecs' microseconds using the TOD clock comparator.
68 local_tick_enable(old_cc); 66 */
67void __udelay(unsigned long usecs)
68{
69 unsigned long flags;
70
71 preempt_disable();
72 local_irq_save(flags);
73 if (in_irq()) {
74 __udelay_disabled(usecs);
75 goto out;
76 }
77 if (in_softirq()) {
78 if (raw_irqs_disabled_flags(flags))
79 __udelay_disabled(usecs);
80 else
81 __udelay_enabled(usecs);
82 goto out;
69 } 83 }
70 if (!irq_context) 84 if (raw_irqs_disabled_flags(flags)) {
85 local_bh_disable();
86 __udelay_disabled(usecs);
71 _local_bh_enable(); 87 _local_bh_enable();
72 set_clock_comparator(S390_lowcore.clock_comparator); 88 goto out;
89 }
90 __udelay_enabled(usecs);
91out:
73 local_irq_restore(flags); 92 local_irq_restore(flags);
93 preempt_enable();
74} 94}
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index 23963882bc18..7495bc774685 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/linkage.h>
10#include <linux/ptrace.h> 11#include <linux/ptrace.h>
11#include <linux/errno.h> 12#include <linux/errno.h>
12#include <linux/kernel_stat.h> 13#include <linux/kernel_stat.h>
@@ -866,7 +867,7 @@ static void kill_prom_timer(void)
866 : "g1", "g2"); 867 : "g1", "g2");
867} 868}
868 869
869void init_irqwork_curcpu(void) 870void notrace init_irqwork_curcpu(void)
870{ 871{
871 int cpu = hard_smp_processor_id(); 872 int cpu = hard_smp_processor_id();
872 873
@@ -897,7 +898,7 @@ static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type
897 } 898 }
898} 899}
899 900
900void __cpuinit sun4v_register_mondo_queues(int this_cpu) 901void __cpuinit notrace sun4v_register_mondo_queues(int this_cpu)
901{ 902{
902 struct trap_per_cpu *tb = &trap_block[this_cpu]; 903 struct trap_per_cpu *tb = &trap_block[this_cpu];
903 904
diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c
index f845f150f565..100ebd527499 100644
--- a/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc64/kernel/of_device.c
@@ -169,7 +169,7 @@ static unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long fla
169 169
170static int of_bus_pci_match(struct device_node *np) 170static int of_bus_pci_match(struct device_node *np)
171{ 171{
172 if (!strcmp(np->type, "pci") || !strcmp(np->type, "pciex")) { 172 if (!strcmp(np->name, "pci")) {
173 const char *model = of_get_property(np, "model", NULL); 173 const char *model = of_get_property(np, "model", NULL);
174 174
175 if (model && !strcmp(model, "SUNW,simba")) 175 if (model && !strcmp(model, "SUNW,simba"))
@@ -200,7 +200,7 @@ static int of_bus_simba_match(struct device_node *np)
200 /* Treat PCI busses lacking ranges property just like 200 /* Treat PCI busses lacking ranges property just like
201 * simba. 201 * simba.
202 */ 202 */
203 if (!strcmp(np->type, "pci") || !strcmp(np->type, "pciex")) { 203 if (!strcmp(np->name, "pci")) {
204 if (!of_find_property(np, "ranges", NULL)) 204 if (!of_find_property(np, "ranges", NULL))
205 return 1; 205 return 1;
206 } 206 }
@@ -429,7 +429,7 @@ static int __init use_1to1_mapping(struct device_node *pp)
429 * it lacks a ranges property, and this will include 429 * it lacks a ranges property, and this will include
430 * cases like Simba. 430 * cases like Simba.
431 */ 431 */
432 if (!strcmp(pp->type, "pci") || !strcmp(pp->type, "pciex")) 432 if (!strcmp(pp->name, "pci"))
433 return 0; 433 return 0;
434 434
435 return 1; 435 return 1;
@@ -714,8 +714,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
714 break; 714 break;
715 } 715 }
716 } else { 716 } else {
717 if (!strcmp(pp->type, "pci") || 717 if (!strcmp(pp->name, "pci")) {
718 !strcmp(pp->type, "pciex")) {
719 unsigned int this_orig_irq = irq; 718 unsigned int this_orig_irq = irq;
720 719
721 irq = pci_irq_swizzle(dp, pp, irq); 720 irq = pci_irq_swizzle(dp, pp, irq);
diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c
index 55096195458f..80dad76f8b81 100644
--- a/arch/sparc64/kernel/pci.c
+++ b/arch/sparc64/kernel/pci.c
@@ -425,7 +425,7 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
425 dev->current_state = 4; /* unknown power state */ 425 dev->current_state = 4; /* unknown power state */
426 dev->error_state = pci_channel_io_normal; 426 dev->error_state = pci_channel_io_normal;
427 427
428 if (!strcmp(type, "pci") || !strcmp(type, "pciex")) { 428 if (!strcmp(node->name, "pci")) {
429 /* a PCI-PCI bridge */ 429 /* a PCI-PCI bridge */
430 dev->hdr_type = PCI_HEADER_TYPE_BRIDGE; 430 dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
431 dev->rom_base_reg = PCI_ROM_ADDRESS1; 431 dev->rom_base_reg = PCI_ROM_ADDRESS1;
diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c
index e205ade69cfc..f85b6bebb0be 100644
--- a/arch/sparc64/kernel/pci_psycho.c
+++ b/arch/sparc64/kernel/pci_psycho.c
@@ -575,7 +575,7 @@ static irqreturn_t psycho_pcierr_intr_other(struct pci_pbm_info *pbm, int is_pbm
575{ 575{
576 unsigned long csr_reg, csr, csr_error_bits; 576 unsigned long csr_reg, csr, csr_error_bits;
577 irqreturn_t ret = IRQ_NONE; 577 irqreturn_t ret = IRQ_NONE;
578 u16 stat; 578 u16 stat, *addr;
579 579
580 if (is_pbm_a) { 580 if (is_pbm_a) {
581 csr_reg = pbm->controller_regs + PSYCHO_PCIA_CTRL; 581 csr_reg = pbm->controller_regs + PSYCHO_PCIA_CTRL;
@@ -597,7 +597,9 @@ static irqreturn_t psycho_pcierr_intr_other(struct pci_pbm_info *pbm, int is_pbm
597 printk("%s: PCI SERR signal asserted.\n", pbm->name); 597 printk("%s: PCI SERR signal asserted.\n", pbm->name);
598 ret = IRQ_HANDLED; 598 ret = IRQ_HANDLED;
599 } 599 }
600 pci_read_config_word(pbm->pci_bus->self, PCI_STATUS, &stat); 600 addr = psycho_pci_config_mkaddr(pbm, pbm->pci_first_busno,
601 0, PCI_STATUS);
602 pci_config_read16(addr, &stat);
601 if (stat & (PCI_STATUS_PARITY | 603 if (stat & (PCI_STATUS_PARITY |
602 PCI_STATUS_SIG_TARGET_ABORT | 604 PCI_STATUS_SIG_TARGET_ABORT |
603 PCI_STATUS_REC_TARGET_ABORT | 605 PCI_STATUS_REC_TARGET_ABORT |
@@ -605,7 +607,7 @@ static irqreturn_t psycho_pcierr_intr_other(struct pci_pbm_info *pbm, int is_pbm
605 PCI_STATUS_SIG_SYSTEM_ERROR)) { 607 PCI_STATUS_SIG_SYSTEM_ERROR)) {
606 printk("%s: PCI bus error, PCI_STATUS[%04x]\n", 608 printk("%s: PCI bus error, PCI_STATUS[%04x]\n",
607 pbm->name, stat); 609 pbm->name, stat);
608 pci_write_config_word(pbm->pci_bus->self, PCI_STATUS, 0xffff); 610 pci_config_write16(addr, 0xffff);
609 ret = IRQ_HANDLED; 611 ret = IRQ_HANDLED;
610 } 612 }
611 return ret; 613 return ret;
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 3d924121c796..c824df13f589 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/linkage.h>
13#include <linux/kernel.h> 14#include <linux/kernel.h>
14#include <linux/signal.h> 15#include <linux/signal.h>
15#include <linux/smp.h> 16#include <linux/smp.h>
@@ -2453,7 +2454,7 @@ struct trap_per_cpu trap_block[NR_CPUS];
2453/* This can get invoked before sched_init() so play it super safe 2454/* This can get invoked before sched_init() so play it super safe
2454 * and use hard_smp_processor_id(). 2455 * and use hard_smp_processor_id().
2455 */ 2456 */
2456void init_cur_cpu_trap(struct thread_info *t) 2457void notrace init_cur_cpu_trap(struct thread_info *t)
2457{ 2458{
2458 int cpu = hard_smp_processor_id(); 2459 int cpu = hard_smp_processor_id();
2459 struct trap_per_cpu *p = &trap_block[cpu]; 2460 struct trap_per_cpu *p = &trap_block[cpu];
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 39fd3f42696d..0d7cdbbfc1ee 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,6 +29,7 @@ config X86
29 select HAVE_FTRACE 29 select HAVE_FTRACE
30 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 30 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
31 select HAVE_ARCH_KGDB if !X86_VOYAGER 31 select HAVE_ARCH_KGDB if !X86_VOYAGER
32 select HAVE_ARCH_TRACEHOOK
32 select HAVE_GENERIC_DMA_COHERENT if X86_32 33 select HAVE_GENERIC_DMA_COHERENT if X86_32
33 select HAVE_EFFICIENT_UNALIGNED_ACCESS 34 select HAVE_EFFICIENT_UNALIGNED_ACCESS
34 35
@@ -1021,7 +1022,7 @@ config HAVE_ARCH_ALLOC_REMAP
1021 1022
1022config ARCH_FLATMEM_ENABLE 1023config ARCH_FLATMEM_ENABLE
1023 def_bool y 1024 def_bool y
1024 depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC && !NUMA 1025 depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA
1025 1026
1026config ARCH_DISCONTIGMEM_ENABLE 1027config ARCH_DISCONTIGMEM_ENABLE
1027 def_bool y 1028 def_bool y
@@ -1037,7 +1038,7 @@ config ARCH_SPARSEMEM_DEFAULT
1037 1038
1038config ARCH_SPARSEMEM_ENABLE 1039config ARCH_SPARSEMEM_ENABLE
1039 def_bool y 1040 def_bool y
1040 depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) 1041 depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_GENERICARCH
1041 select SPARSEMEM_STATIC if X86_32 1042 select SPARSEMEM_STATIC if X86_32
1042 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 1043 select SPARSEMEM_VMEMMAP_ENABLE if X86_64
1043 1044
@@ -1118,10 +1119,10 @@ config MTRR
1118 You can safely say Y even if your machine doesn't have MTRRs, you'll 1119 You can safely say Y even if your machine doesn't have MTRRs, you'll
1119 just add about 9 KB to your kernel. 1120 just add about 9 KB to your kernel.
1120 1121
1121 See <file:Documentation/mtrr.txt> for more information. 1122 See <file:Documentation/x86/mtrr.txt> for more information.
1122 1123
1123config MTRR_SANITIZER 1124config MTRR_SANITIZER
1124 bool 1125 def_bool y
1125 prompt "MTRR cleanup support" 1126 prompt "MTRR cleanup support"
1126 depends on MTRR 1127 depends on MTRR
1127 help 1128 help
@@ -1132,7 +1133,7 @@ config MTRR_SANITIZER
1132 The largest mtrr entry size for a continous block can be set with 1133 The largest mtrr entry size for a continous block can be set with
1133 mtrr_chunk_size. 1134 mtrr_chunk_size.
1134 1135
1135 If unsure, say N. 1136 If unsure, say Y.
1136 1137
1137config MTRR_SANITIZER_ENABLE_DEFAULT 1138config MTRR_SANITIZER_ENABLE_DEFAULT
1138 int "MTRR cleanup enable value (0-1)" 1139 int "MTRR cleanup enable value (0-1)"
@@ -1192,7 +1193,6 @@ config IRQBALANCE
1192config SECCOMP 1193config SECCOMP
1193 def_bool y 1194 def_bool y
1194 prompt "Enable seccomp to safely compute untrusted bytecode" 1195 prompt "Enable seccomp to safely compute untrusted bytecode"
1195 depends on PROC_FS
1196 help 1196 help
1197 This kernel feature is useful for number crunching applications 1197 This kernel feature is useful for number crunching applications
1198 that may need to compute untrusted bytecode during their 1198 that may need to compute untrusted bytecode during their
@@ -1200,7 +1200,7 @@ config SECCOMP
1200 the process as file descriptors supporting the read/write 1200 the process as file descriptors supporting the read/write
1201 syscalls, it's possible to isolate those applications in 1201 syscalls, it's possible to isolate those applications in
1202 their own address space using seccomp. Once seccomp is 1202 their own address space using seccomp. Once seccomp is
1203 enabled via /proc/<pid>/seccomp, it cannot be disabled 1203 enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
1204 and the task is only allowed to execute a few safe syscalls 1204 and the task is only allowed to execute a few safe syscalls
1205 defined by each seccomp mode. 1205 defined by each seccomp mode.
1206 1206
@@ -1357,14 +1357,14 @@ config PHYSICAL_ALIGN
1357 Don't change this unless you know what you are doing. 1357 Don't change this unless you know what you are doing.
1358 1358
1359config HOTPLUG_CPU 1359config HOTPLUG_CPU
1360 bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)" 1360 bool "Support for hot-pluggable CPUs"
1361 depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER 1361 depends on SMP && HOTPLUG && !X86_VOYAGER
1362 ---help--- 1362 ---help---
1363 Say Y here to experiment with turning CPUs off and on, and to 1363 Say Y here to allow turning CPUs off and on. CPUs can be
1364 enable suspend on SMP systems. CPUs can be controlled through 1364 controlled through /sys/devices/system/cpu.
1365 /sys/devices/system/cpu. 1365 ( Note: power management support will enable this option
1366 Say N if you want to disable CPU hotplug and don't need to 1366 automatically on SMP systems. )
1367 suspend. 1367 Say N if you want to disable CPU hotplug.
1368 1368
1369config COMPAT_VDSO 1369config COMPAT_VDSO
1370 def_bool y 1370 def_bool y
@@ -1379,6 +1379,51 @@ config COMPAT_VDSO
1379 1379
1380 If unsure, say Y. 1380 If unsure, say Y.
1381 1381
1382config CMDLINE_BOOL
1383 bool "Built-in kernel command line"
1384 default n
1385 help
1386 Allow for specifying boot arguments to the kernel at
1387 build time. On some systems (e.g. embedded ones), it is
1388 necessary or convenient to provide some or all of the
1389 kernel boot arguments with the kernel itself (that is,
1390 to not rely on the boot loader to provide them.)
1391
1392 To compile command line arguments into the kernel,
1393 set this option to 'Y', then fill in the
1394 the boot arguments in CONFIG_CMDLINE.
1395
1396 Systems with fully functional boot loaders (i.e. non-embedded)
1397 should leave this option set to 'N'.
1398
1399config CMDLINE
1400 string "Built-in kernel command string"
1401 depends on CMDLINE_BOOL
1402 default ""
1403 help
1404 Enter arguments here that should be compiled into the kernel
1405 image and used at boot time. If the boot loader provides a
1406 command line at boot time, it is appended to this string to
1407 form the full kernel command line, when the system boots.
1408
1409 However, you can use the CONFIG_CMDLINE_OVERRIDE option to
1410 change this behavior.
1411
1412 In most cases, the command line (whether built-in or provided
1413 by the boot loader) should specify the device for the root
1414 file system.
1415
1416config CMDLINE_OVERRIDE
1417 bool "Built-in command line overrides boot loader arguments"
1418 default n
1419 depends on CMDLINE_BOOL
1420 help
1421 Set this option to 'Y' to have the kernel ignore the boot loader
1422 command line, and use ONLY the built-in command line.
1423
1424 This is used to work around broken boot loaders. This should
1425 be set to 'N' under normal conditions.
1426
1382endmenu 1427endmenu
1383 1428
1384config ARCH_ENABLE_MEMORY_HOTPLUG 1429config ARCH_ENABLE_MEMORY_HOTPLUG
@@ -1774,7 +1819,7 @@ config COMPAT_FOR_U64_ALIGNMENT
1774 1819
1775config SYSVIPC_COMPAT 1820config SYSVIPC_COMPAT
1776 def_bool y 1821 def_bool y
1777 depends on X86_64 && COMPAT && SYSVIPC 1822 depends on COMPAT && SYSVIPC
1778 1823
1779endmenu 1824endmenu
1780 1825
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b225219c448c..60a85768cfcb 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -418,3 +418,21 @@ config X86_MINIMUM_CPU_FAMILY
418config X86_DEBUGCTLMSR 418config X86_DEBUGCTLMSR
419 def_bool y 419 def_bool y
420 depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) 420 depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
421
422config X86_DS
423 bool "Debug Store support"
424 default y
425 help
426 Add support for Debug Store.
427 This allows the kernel to provide a memory buffer to the hardware
428 to store various profiling and tracing events.
429
430config X86_PTRACE_BTS
431 bool "ptrace interface to Branch Trace Store"
432 default y
433 depends on (X86_DS && X86_DEBUGCTLMSR)
434 help
435 Add a ptrace interface to allow collecting an execution trace
436 of the traced task.
437 This collects control flow changes in a (cyclic) buffer and allows
438 debuggers to fill in the gaps and show an execution trace of the debuggee.
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index ba7736cf2ec7..29c5fbf08392 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -137,14 +137,15 @@ relocated:
137 */ 137 */
138 movl output_len(%ebx), %eax 138 movl output_len(%ebx), %eax
139 pushl %eax 139 pushl %eax
140 # push arguments for decompress_kernel:
140 pushl %ebp # output address 141 pushl %ebp # output address
141 movl input_len(%ebx), %eax 142 movl input_len(%ebx), %eax
142 pushl %eax # input_len 143 pushl %eax # input_len
143 leal input_data(%ebx), %eax 144 leal input_data(%ebx), %eax
144 pushl %eax # input_data 145 pushl %eax # input_data
145 leal boot_heap(%ebx), %eax 146 leal boot_heap(%ebx), %eax
146 pushl %eax # heap area as third argument 147 pushl %eax # heap area
147 pushl %esi # real mode pointer as second arg 148 pushl %esi # real mode pointer
148 call decompress_kernel 149 call decompress_kernel
149 addl $20, %esp 150 addl $20, %esp
150 popl %ecx 151 popl %ecx
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 9fea73706479..5780d361105b 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -16,7 +16,7 @@
16 */ 16 */
17#undef CONFIG_PARAVIRT 17#undef CONFIG_PARAVIRT
18#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
19#define _ASM_DESC_H_ 1 19#define ASM_X86__DESC_H 1
20#endif 20#endif
21 21
22#ifdef CONFIG_X86_64 22#ifdef CONFIG_X86_64
@@ -27,7 +27,7 @@
27#include <linux/linkage.h> 27#include <linux/linkage.h>
28#include <linux/screen_info.h> 28#include <linux/screen_info.h>
29#include <linux/elf.h> 29#include <linux/elf.h>
30#include <asm/io.h> 30#include <linux/io.h>
31#include <asm/page.h> 31#include <asm/page.h>
32#include <asm/boot.h> 32#include <asm/boot.h>
33#include <asm/bootparam.h> 33#include <asm/bootparam.h>
@@ -251,7 +251,7 @@ static void __putstr(int error, const char *s)
251 y--; 251 y--;
252 } 252 }
253 } else { 253 } else {
254 vidmem [(x + cols * y) * 2] = c; 254 vidmem[(x + cols * y) * 2] = c;
255 if (++x >= cols) { 255 if (++x >= cols) {
256 x = 0; 256 x = 0;
257 if (++y >= lines) { 257 if (++y >= lines) {
@@ -277,7 +277,8 @@ static void *memset(void *s, int c, unsigned n)
277 int i; 277 int i;
278 char *ss = s; 278 char *ss = s;
279 279
280 for (i = 0; i < n; i++) ss[i] = c; 280 for (i = 0; i < n; i++)
281 ss[i] = c;
281 return s; 282 return s;
282} 283}
283 284
@@ -287,7 +288,8 @@ static void *memcpy(void *dest, const void *src, unsigned n)
287 const char *s = src; 288 const char *s = src;
288 char *d = dest; 289 char *d = dest;
289 290
290 for (i = 0; i < n; i++) d[i] = s[i]; 291 for (i = 0; i < n; i++)
292 d[i] = s[i];
291 return dest; 293 return dest;
292} 294}
293 295
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index a1310c52fc0c..857e492c571e 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -492,7 +492,7 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
492 continue; 492 continue;
493 } 493 }
494 sh_symtab = sec_symtab->symtab; 494 sh_symtab = sec_symtab->symtab;
495 sym_strtab = sec->link->strtab; 495 sym_strtab = sec_symtab->link->strtab;
496 for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { 496 for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
497 Elf32_Rel *rel; 497 Elf32_Rel *rel;
498 Elf32_Sym *sym; 498 Elf32_Sym *sym;
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index af86e431acfa..b993062e9a5f 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -30,7 +30,6 @@ SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */
30SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */ 30SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */
31 /* to be loaded */ 31 /* to be loaded */
32ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */ 32ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */
33SWAP_DEV = 0 /* SWAP_DEV is now written by "build" */
34 33
35#ifndef SVGA_MODE 34#ifndef SVGA_MODE
36#define SVGA_MODE ASK_VGA 35#define SVGA_MODE ASK_VGA
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 104275e191a8..ef9a52005ec9 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.27-rc4 3# Linux kernel version: 2.6.27-rc5
4# Mon Aug 25 15:04:00 2008 4# Wed Sep 3 17:23:09 2008
5# 5#
6# CONFIG_64BIT is not set 6# CONFIG_64BIT is not set
7CONFIG_X86_32=y 7CONFIG_X86_32=y
@@ -202,7 +202,7 @@ CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
202# CONFIG_M586 is not set 202# CONFIG_M586 is not set
203# CONFIG_M586TSC is not set 203# CONFIG_M586TSC is not set
204# CONFIG_M586MMX is not set 204# CONFIG_M586MMX is not set
205# CONFIG_M686 is not set 205CONFIG_M686=y
206# CONFIG_MPENTIUMII is not set 206# CONFIG_MPENTIUMII is not set
207# CONFIG_MPENTIUMIII is not set 207# CONFIG_MPENTIUMIII is not set
208# CONFIG_MPENTIUMM is not set 208# CONFIG_MPENTIUMM is not set
@@ -221,13 +221,14 @@ CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
221# CONFIG_MVIAC3_2 is not set 221# CONFIG_MVIAC3_2 is not set
222# CONFIG_MVIAC7 is not set 222# CONFIG_MVIAC7 is not set
223# CONFIG_MPSC is not set 223# CONFIG_MPSC is not set
224CONFIG_MCORE2=y 224# CONFIG_MCORE2 is not set
225# CONFIG_GENERIC_CPU is not set 225# CONFIG_GENERIC_CPU is not set
226CONFIG_X86_GENERIC=y 226CONFIG_X86_GENERIC=y
227CONFIG_X86_CPU=y 227CONFIG_X86_CPU=y
228CONFIG_X86_CMPXCHG=y 228CONFIG_X86_CMPXCHG=y
229CONFIG_X86_L1_CACHE_SHIFT=7 229CONFIG_X86_L1_CACHE_SHIFT=7
230CONFIG_X86_XADD=y 230CONFIG_X86_XADD=y
231# CONFIG_X86_PPRO_FENCE is not set
231CONFIG_X86_WP_WORKS_OK=y 232CONFIG_X86_WP_WORKS_OK=y
232CONFIG_X86_INVLPG=y 233CONFIG_X86_INVLPG=y
233CONFIG_X86_BSWAP=y 234CONFIG_X86_BSWAP=y
@@ -235,14 +236,15 @@ CONFIG_X86_POPAD_OK=y
235CONFIG_X86_INTEL_USERCOPY=y 236CONFIG_X86_INTEL_USERCOPY=y
236CONFIG_X86_USE_PPRO_CHECKSUM=y 237CONFIG_X86_USE_PPRO_CHECKSUM=y
237CONFIG_X86_TSC=y 238CONFIG_X86_TSC=y
239CONFIG_X86_CMOV=y
238CONFIG_X86_MINIMUM_CPU_FAMILY=4 240CONFIG_X86_MINIMUM_CPU_FAMILY=4
239CONFIG_X86_DEBUGCTLMSR=y 241CONFIG_X86_DEBUGCTLMSR=y
240CONFIG_HPET_TIMER=y 242CONFIG_HPET_TIMER=y
241CONFIG_HPET_EMULATE_RTC=y 243CONFIG_HPET_EMULATE_RTC=y
242CONFIG_DMI=y 244CONFIG_DMI=y
243# CONFIG_IOMMU_HELPER is not set 245# CONFIG_IOMMU_HELPER is not set
244CONFIG_NR_CPUS=4 246CONFIG_NR_CPUS=64
245# CONFIG_SCHED_SMT is not set 247CONFIG_SCHED_SMT=y
246CONFIG_SCHED_MC=y 248CONFIG_SCHED_MC=y
247# CONFIG_PREEMPT_NONE is not set 249# CONFIG_PREEMPT_NONE is not set
248CONFIG_PREEMPT_VOLUNTARY=y 250CONFIG_PREEMPT_VOLUNTARY=y
@@ -254,7 +256,8 @@ CONFIG_VM86=y
254# CONFIG_TOSHIBA is not set 256# CONFIG_TOSHIBA is not set
255# CONFIG_I8K is not set 257# CONFIG_I8K is not set
256CONFIG_X86_REBOOTFIXUPS=y 258CONFIG_X86_REBOOTFIXUPS=y
257# CONFIG_MICROCODE is not set 259CONFIG_MICROCODE=y
260CONFIG_MICROCODE_OLD_INTERFACE=y
258CONFIG_X86_MSR=y 261CONFIG_X86_MSR=y
259CONFIG_X86_CPUID=y 262CONFIG_X86_CPUID=y
260# CONFIG_NOHIGHMEM is not set 263# CONFIG_NOHIGHMEM is not set
@@ -2115,7 +2118,7 @@ CONFIG_IO_DELAY_0X80=y
2115CONFIG_DEFAULT_IO_DELAY_TYPE=0 2118CONFIG_DEFAULT_IO_DELAY_TYPE=0
2116CONFIG_DEBUG_BOOT_PARAMS=y 2119CONFIG_DEBUG_BOOT_PARAMS=y
2117# CONFIG_CPA_DEBUG is not set 2120# CONFIG_CPA_DEBUG is not set
2118# CONFIG_OPTIMIZE_INLINING is not set 2121CONFIG_OPTIMIZE_INLINING=y
2119 2122
2120# 2123#
2121# Security options 2124# Security options
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 678c8acefe04..e620ea6e2a7a 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.27-rc4 3# Linux kernel version: 2.6.27-rc5
4# Mon Aug 25 14:40:46 2008 4# Wed Sep 3 17:13:39 2008
5# 5#
6CONFIG_64BIT=y 6CONFIG_64BIT=y
7# CONFIG_X86_32 is not set 7# CONFIG_X86_32 is not set
@@ -218,17 +218,14 @@ CONFIG_X86_PC=y
218# CONFIG_MVIAC3_2 is not set 218# CONFIG_MVIAC3_2 is not set
219# CONFIG_MVIAC7 is not set 219# CONFIG_MVIAC7 is not set
220# CONFIG_MPSC is not set 220# CONFIG_MPSC is not set
221CONFIG_MCORE2=y 221# CONFIG_MCORE2 is not set
222# CONFIG_GENERIC_CPU is not set 222CONFIG_GENERIC_CPU=y
223CONFIG_X86_CPU=y 223CONFIG_X86_CPU=y
224CONFIG_X86_L1_CACHE_BYTES=64 224CONFIG_X86_L1_CACHE_BYTES=128
225CONFIG_X86_INTERNODE_CACHE_BYTES=64 225CONFIG_X86_INTERNODE_CACHE_BYTES=128
226CONFIG_X86_CMPXCHG=y 226CONFIG_X86_CMPXCHG=y
227CONFIG_X86_L1_CACHE_SHIFT=6 227CONFIG_X86_L1_CACHE_SHIFT=7
228CONFIG_X86_WP_WORKS_OK=y 228CONFIG_X86_WP_WORKS_OK=y
229CONFIG_X86_INTEL_USERCOPY=y
230CONFIG_X86_USE_PPRO_CHECKSUM=y
231CONFIG_X86_P6_NOP=y
232CONFIG_X86_TSC=y 229CONFIG_X86_TSC=y
233CONFIG_X86_CMPXCHG64=y 230CONFIG_X86_CMPXCHG64=y
234CONFIG_X86_CMOV=y 231CONFIG_X86_CMOV=y
@@ -243,9 +240,8 @@ CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y
243CONFIG_AMD_IOMMU=y 240CONFIG_AMD_IOMMU=y
244CONFIG_SWIOTLB=y 241CONFIG_SWIOTLB=y
245CONFIG_IOMMU_HELPER=y 242CONFIG_IOMMU_HELPER=y
246# CONFIG_MAXSMP is not set 243CONFIG_NR_CPUS=64
247CONFIG_NR_CPUS=4 244CONFIG_SCHED_SMT=y
248# CONFIG_SCHED_SMT is not set
249CONFIG_SCHED_MC=y 245CONFIG_SCHED_MC=y
250# CONFIG_PREEMPT_NONE is not set 246# CONFIG_PREEMPT_NONE is not set
251CONFIG_PREEMPT_VOLUNTARY=y 247CONFIG_PREEMPT_VOLUNTARY=y
@@ -254,7 +250,8 @@ CONFIG_X86_LOCAL_APIC=y
254CONFIG_X86_IO_APIC=y 250CONFIG_X86_IO_APIC=y
255# CONFIG_X86_MCE is not set 251# CONFIG_X86_MCE is not set
256# CONFIG_I8K is not set 252# CONFIG_I8K is not set
257# CONFIG_MICROCODE is not set 253CONFIG_MICROCODE=y
254CONFIG_MICROCODE_OLD_INTERFACE=y
258CONFIG_X86_MSR=y 255CONFIG_X86_MSR=y
259CONFIG_X86_CPUID=y 256CONFIG_X86_CPUID=y
260CONFIG_NUMA=y 257CONFIG_NUMA=y
@@ -290,7 +287,7 @@ CONFIG_BOUNCE=y
290CONFIG_VIRT_TO_BUS=y 287CONFIG_VIRT_TO_BUS=y
291CONFIG_MTRR=y 288CONFIG_MTRR=y
292# CONFIG_MTRR_SANITIZER is not set 289# CONFIG_MTRR_SANITIZER is not set
293# CONFIG_X86_PAT is not set 290CONFIG_X86_PAT=y
294CONFIG_EFI=y 291CONFIG_EFI=y
295CONFIG_SECCOMP=y 292CONFIG_SECCOMP=y
296# CONFIG_HZ_100 is not set 293# CONFIG_HZ_100 is not set
@@ -2089,7 +2086,7 @@ CONFIG_IO_DELAY_0X80=y
2089CONFIG_DEFAULT_IO_DELAY_TYPE=0 2086CONFIG_DEFAULT_IO_DELAY_TYPE=0
2090CONFIG_DEBUG_BOOT_PARAMS=y 2087CONFIG_DEBUG_BOOT_PARAMS=y
2091# CONFIG_CPA_DEBUG is not set 2088# CONFIG_CPA_DEBUG is not set
2092# CONFIG_OPTIMIZE_INLINING is not set 2089CONFIG_OPTIMIZE_INLINING=y
2093 2090
2094# 2091#
2095# Security options 2092# Security options
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index a0e1dbe67dc1..127ec3f07214 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -85,8 +85,10 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
85 dump->regs.ax = regs->ax; 85 dump->regs.ax = regs->ax;
86 dump->regs.ds = current->thread.ds; 86 dump->regs.ds = current->thread.ds;
87 dump->regs.es = current->thread.es; 87 dump->regs.es = current->thread.es;
88 asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs; 88 savesegment(fs, fs);
89 asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs; 89 dump->regs.fs = fs;
90 savesegment(gs, gs);
91 dump->regs.gs = gs;
90 dump->regs.orig_ax = regs->orig_ax; 92 dump->regs.orig_ax = regs->orig_ax;
91 dump->regs.ip = regs->ip; 93 dump->regs.ip = regs->ip;
92 dump->regs.cs = regs->cs; 94 dump->regs.cs = regs->cs;
@@ -430,8 +432,9 @@ beyond_if:
430 current->mm->start_stack = 432 current->mm->start_stack =
431 (unsigned long)create_aout_tables((char __user *)bprm->p, bprm); 433 (unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
432 /* start thread */ 434 /* start thread */
433 asm volatile("movl %0,%%fs" :: "r" (0)); \ 435 loadsegment(fs, 0);
434 asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); 436 loadsegment(ds, __USER32_DS);
437 loadsegment(es, __USER32_DS);
435 load_gs_index(0); 438 load_gs_index(0);
436 (regs)->ip = ex.a_entry; 439 (regs)->ip = ex.a_entry;
437 (regs)->sp = current->mm->start_stack; 440 (regs)->sp = current->mm->start_stack;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 20af4c79579a..f1a2ac777faf 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -206,7 +206,7 @@ struct rt_sigframe
206 { unsigned int cur; \ 206 { unsigned int cur; \
207 unsigned short pre; \ 207 unsigned short pre; \
208 err |= __get_user(pre, &sc->seg); \ 208 err |= __get_user(pre, &sc->seg); \
209 asm volatile("movl %%" #seg ",%0" : "=r" (cur)); \ 209 savesegment(seg, cur); \
210 pre |= mask; \ 210 pre |= mask; \
211 if (pre != cur) loadsegment(seg, pre); } 211 if (pre != cur) loadsegment(seg, pre); }
212 212
@@ -235,7 +235,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
235 */ 235 */
236 err |= __get_user(gs, &sc->gs); 236 err |= __get_user(gs, &sc->gs);
237 gs |= 3; 237 gs |= 3;
238 asm("movl %%gs,%0" : "=r" (oldgs)); 238 savesegment(gs, oldgs);
239 if (gs != oldgs) 239 if (gs != oldgs)
240 load_gs_index(gs); 240 load_gs_index(gs);
241 241
@@ -355,14 +355,13 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
355{ 355{
356 int tmp, err = 0; 356 int tmp, err = 0;
357 357
358 tmp = 0; 358 savesegment(gs, tmp);
359 __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
360 err |= __put_user(tmp, (unsigned int __user *)&sc->gs); 359 err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
361 __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); 360 savesegment(fs, tmp);
362 err |= __put_user(tmp, (unsigned int __user *)&sc->fs); 361 err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
363 __asm__("movl %%ds,%0" : "=r"(tmp): "0"(tmp)); 362 savesegment(ds, tmp);
364 err |= __put_user(tmp, (unsigned int __user *)&sc->ds); 363 err |= __put_user(tmp, (unsigned int __user *)&sc->ds);
365 __asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp)); 364 savesegment(es, tmp);
366 err |= __put_user(tmp, (unsigned int __user *)&sc->es); 365 err |= __put_user(tmp, (unsigned int __user *)&sc->es);
367 366
368 err |= __put_user((u32)regs->di, &sc->di); 367 err |= __put_user((u32)regs->di, &sc->di);
@@ -498,8 +497,8 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
498 regs->dx = 0; 497 regs->dx = 0;
499 regs->cx = 0; 498 regs->cx = 0;
500 499
501 asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); 500 loadsegment(ds, __USER32_DS);
502 asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); 501 loadsegment(es, __USER32_DS);
503 502
504 regs->cs = __USER32_CS; 503 regs->cs = __USER32_CS;
505 regs->ss = __USER32_DS; 504 regs->ss = __USER32_DS;
@@ -591,8 +590,8 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
591 regs->dx = (unsigned long) &frame->info; 590 regs->dx = (unsigned long) &frame->info;
592 regs->cx = (unsigned long) &frame->uc; 591 regs->cx = (unsigned long) &frame->uc;
593 592
594 asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); 593 loadsegment(ds, __USER32_DS);
595 asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); 594 loadsegment(es, __USER32_DS);
596 595
597 regs->cs = __USER32_CS; 596 regs->cs = __USER32_CS;
598 regs->ss = __USER32_DS; 597 regs->ss = __USER32_DS;
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index d3c64088b981..beda4232ce69 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -556,15 +556,6 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
556 return ret; 556 return ret;
557} 557}
558 558
559/* These are here just in case some old ia32 binary calls it. */
560asmlinkage long sys32_pause(void)
561{
562 current->state = TASK_INTERRUPTIBLE;
563 schedule();
564 return -ERESTARTNOHAND;
565}
566
567
568#ifdef CONFIG_SYSCTL_SYSCALL 559#ifdef CONFIG_SYSCTL_SYSCALL
569struct sysctl_ia32 { 560struct sysctl_ia32 {
570 unsigned int name; 561 unsigned int name;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index bfd10fd211cd..7d40ef7b36e3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -58,7 +58,6 @@ EXPORT_SYMBOL(acpi_disabled);
58#ifdef CONFIG_X86_64 58#ifdef CONFIG_X86_64
59 59
60#include <asm/proto.h> 60#include <asm/proto.h>
61#include <asm/genapic.h>
62 61
63#else /* X86 */ 62#else /* X86 */
64 63
@@ -97,8 +96,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
97#warning ACPI uses CMPXCHG, i486 and later hardware 96#warning ACPI uses CMPXCHG, i486 and later hardware
98#endif 97#endif
99 98
100static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
101
102/* -------------------------------------------------------------------------- 99/* --------------------------------------------------------------------------
103 Boot-time Configuration 100 Boot-time Configuration
104 -------------------------------------------------------------------------- */ 101 -------------------------------------------------------------------------- */
@@ -160,6 +157,8 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
160struct acpi_mcfg_allocation *pci_mmcfg_config; 157struct acpi_mcfg_allocation *pci_mmcfg_config;
161int pci_mmcfg_config_num; 158int pci_mmcfg_config_num;
162 159
160static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
161
163static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) 162static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
164{ 163{
165 if (!strcmp(mcfg->header.oem_id, "SGI")) 164 if (!strcmp(mcfg->header.oem_id, "SGI"))
@@ -1605,6 +1604,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
1605 */ 1604 */
1606 { 1605 {
1607 .callback = dmi_ignore_irq0_timer_override, 1606 .callback = dmi_ignore_irq0_timer_override,
1607 .ident = "HP nx6115 laptop",
1608 .matches = {
1609 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
1610 DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"),
1611 },
1612 },
1613 {
1614 .callback = dmi_ignore_irq0_timer_override,
1608 .ident = "HP NX6125 laptop", 1615 .ident = "HP NX6125 laptop",
1609 .matches = { 1616 .matches = {
1610 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), 1617 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
@@ -1619,6 +1626,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
1619 DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), 1626 DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"),
1620 }, 1627 },
1621 }, 1628 },
1629 {
1630 .callback = dmi_ignore_irq0_timer_override,
1631 .ident = "HP 6715b laptop",
1632 .matches = {
1633 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
1634 DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"),
1635 },
1636 },
1622 {} 1637 {}
1623}; 1638};
1624 1639
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65a0c1b48696..fb04e49776ba 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -231,25 +231,25 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
231 continue; 231 continue;
232 if (*ptr > text_end) 232 if (*ptr > text_end)
233 continue; 233 continue;
234 text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */ 234 /* turn DS segment override prefix into lock prefix */
235 text_poke(*ptr, ((unsigned char []){0xf0}), 1);
235 }; 236 };
236} 237}
237 238
238static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) 239static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
239{ 240{
240 u8 **ptr; 241 u8 **ptr;
241 char insn[1];
242 242
243 if (noreplace_smp) 243 if (noreplace_smp)
244 return; 244 return;
245 245
246 add_nops(insn, 1);
247 for (ptr = start; ptr < end; ptr++) { 246 for (ptr = start; ptr < end; ptr++) {
248 if (*ptr < text) 247 if (*ptr < text)
249 continue; 248 continue;
250 if (*ptr > text_end) 249 if (*ptr > text_end)
251 continue; 250 continue;
252 text_poke(*ptr, insn, 1); 251 /* turn lock prefix into DS segment override prefix */
252 text_poke(*ptr, ((unsigned char []){0x3E}), 1);
253 }; 253 };
254} 254}
255 255
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 3b346c6f5514..34e4d112b1ef 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -201,10 +201,10 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
201 */ 201 */
202static int iommu_completion_wait(struct amd_iommu *iommu) 202static int iommu_completion_wait(struct amd_iommu *iommu)
203{ 203{
204 int ret, ready = 0; 204 int ret = 0, ready = 0;
205 unsigned status = 0; 205 unsigned status = 0;
206 struct iommu_cmd cmd; 206 struct iommu_cmd cmd;
207 unsigned long i = 0; 207 unsigned long flags, i = 0;
208 208
209 memset(&cmd, 0, sizeof(cmd)); 209 memset(&cmd, 0, sizeof(cmd));
210 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; 210 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
@@ -212,10 +212,12 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
212 212
213 iommu->need_sync = 0; 213 iommu->need_sync = 0;
214 214
215 ret = iommu_queue_command(iommu, &cmd); 215 spin_lock_irqsave(&iommu->lock, flags);
216
217 ret = __iommu_queue_command(iommu, &cmd);
216 218
217 if (ret) 219 if (ret)
218 return ret; 220 goto out;
219 221
220 while (!ready && (i < EXIT_LOOP_COUNT)) { 222 while (!ready && (i < EXIT_LOOP_COUNT)) {
221 ++i; 223 ++i;
@@ -230,6 +232,8 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
230 232
231 if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) 233 if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
232 printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); 234 printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
235out:
236 spin_unlock_irqrestore(&iommu->lock, flags);
233 237
234 return 0; 238 return 0;
235} 239}
@@ -240,6 +244,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
240static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) 244static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
241{ 245{
242 struct iommu_cmd cmd; 246 struct iommu_cmd cmd;
247 int ret;
243 248
244 BUG_ON(iommu == NULL); 249 BUG_ON(iommu == NULL);
245 250
@@ -247,9 +252,11 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
247 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); 252 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
248 cmd.data[0] = devid; 253 cmd.data[0] = devid;
249 254
255 ret = iommu_queue_command(iommu, &cmd);
256
250 iommu->need_sync = 1; 257 iommu->need_sync = 1;
251 258
252 return iommu_queue_command(iommu, &cmd); 259 return ret;
253} 260}
254 261
255/* 262/*
@@ -259,6 +266,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
259 u64 address, u16 domid, int pde, int s) 266 u64 address, u16 domid, int pde, int s)
260{ 267{
261 struct iommu_cmd cmd; 268 struct iommu_cmd cmd;
269 int ret;
262 270
263 memset(&cmd, 0, sizeof(cmd)); 271 memset(&cmd, 0, sizeof(cmd));
264 address &= PAGE_MASK; 272 address &= PAGE_MASK;
@@ -271,9 +279,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
271 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ 279 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
272 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; 280 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
273 281
282 ret = iommu_queue_command(iommu, &cmd);
283
274 iommu->need_sync = 1; 284 iommu->need_sync = 1;
275 285
276 return iommu_queue_command(iommu, &cmd); 286 return ret;
277} 287}
278 288
279/* 289/*
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 44e21826db11..9a32b37ee2ee 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -455,11 +455,11 @@ out:
455 force_iommu || 455 force_iommu ||
456 valid_agp || 456 valid_agp ||
457 fallback_aper_force) { 457 fallback_aper_force) {
458 printk(KERN_ERR 458 printk(KERN_INFO
459 "Your BIOS doesn't leave a aperture memory hole\n"); 459 "Your BIOS doesn't leave a aperture memory hole\n");
460 printk(KERN_ERR 460 printk(KERN_INFO
461 "Please enable the IOMMU option in the BIOS setup\n"); 461 "Please enable the IOMMU option in the BIOS setup\n");
462 printk(KERN_ERR 462 printk(KERN_INFO
463 "This costs you %d MB of RAM\n", 463 "This costs you %d MB of RAM\n",
464 32 << fallback_aper_order); 464 32 << fallback_aper_order);
465 465
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 9ee24e6bc4b0..5145a6e72bbb 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -228,12 +228,12 @@
228#include <linux/suspend.h> 228#include <linux/suspend.h>
229#include <linux/kthread.h> 229#include <linux/kthread.h>
230#include <linux/jiffies.h> 230#include <linux/jiffies.h>
231#include <linux/smp_lock.h>
232 231
233#include <asm/system.h> 232#include <asm/system.h>
234#include <asm/uaccess.h> 233#include <asm/uaccess.h>
235#include <asm/desc.h> 234#include <asm/desc.h>
236#include <asm/i8253.h> 235#include <asm/i8253.h>
236#include <asm/olpc.h>
237#include <asm/paravirt.h> 237#include <asm/paravirt.h>
238#include <asm/reboot.h> 238#include <asm/reboot.h>
239 239
@@ -2217,7 +2217,7 @@ static int __init apm_init(void)
2217 2217
2218 dmi_check_system(apm_dmi_table); 2218 dmi_check_system(apm_dmi_table);
2219 2219
2220 if (apm_info.bios.version == 0 || paravirt_enabled()) { 2220 if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) {
2221 printk(KERN_INFO "apm: BIOS not found.\n"); 2221 printk(KERN_INFO "apm: BIOS not found.\n");
2222 return -ENODEV; 2222 return -ENODEV;
2223 } 2223 }
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index aa89387006fe..505543a75a56 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -22,7 +22,7 @@
22 22
23#define __NO_STUBS 1 23#define __NO_STUBS 1
24#undef __SYSCALL 24#undef __SYSCALL
25#undef _ASM_X86_64_UNISTD_H_ 25#undef ASM_X86__UNISTD_64_H
26#define __SYSCALL(nr, sym) [nr] = 1, 26#define __SYSCALL(nr, sym) [nr] = 1,
27static char syscalls[] = { 27static char syscalls[] = {
28#include <asm/unistd.h> 28#include <asm/unistd.h>
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index c639bd55391c..fdd585f9c53d 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -25,11 +25,11 @@ x86_bios_strerror(long status)
25{ 25{
26 const char *str; 26 const char *str;
27 switch (status) { 27 switch (status) {
28 case 0: str = "Call completed without error"; break; 28 case 0: str = "Call completed without error"; break;
29 case -1: str = "Not implemented"; break; 29 case -1: str = "Not implemented"; break;
30 case -2: str = "Invalid argument"; break; 30 case -2: str = "Invalid argument"; break;
31 case -3: str = "Call completed with error"; break; 31 case -3: str = "Call completed with error"; break;
32 default: str = "Unknown BIOS status code"; break; 32 default: str = "Unknown BIOS status code"; break;
33 } 33 }
34 return str; 34 return str;
35} 35}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8aab8517642e..4e456bd955bb 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -344,31 +344,15 @@ static void __init early_cpu_detect(void)
344 344
345/* 345/*
346 * The NOPL instruction is supposed to exist on all CPUs with 346 * The NOPL instruction is supposed to exist on all CPUs with
347 * family >= 6, unfortunately, that's not true in practice because 347 * family >= 6; unfortunately, that's not true in practice because
348 * of early VIA chips and (more importantly) broken virtualizers that 348 * of early VIA chips and (more importantly) broken virtualizers that
349 * are not easy to detect. Hence, probe for it based on first 349 * are not easy to detect. In the latter case it doesn't even *fail*
350 * principles. 350 * reliably, so probing for it doesn't even work. Disable it completely
351 * unless we can find a reliable way to detect all the broken cases.
351 */ 352 */
352static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) 353static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
353{ 354{
354 const u32 nopl_signature = 0x888c53b1; /* Random number */
355 u32 has_nopl = nopl_signature;
356
357 clear_cpu_cap(c, X86_FEATURE_NOPL); 355 clear_cpu_cap(c, X86_FEATURE_NOPL);
358 if (c->x86 >= 6) {
359 asm volatile("\n"
360 "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
361 "2:\n"
362 " .section .fixup,\"ax\"\n"
363 "3: xor %0,%0\n"
364 " jmp 2b\n"
365 " .previous\n"
366 _ASM_EXTABLE(1b,3b)
367 : "+a" (has_nopl));
368
369 if (has_nopl == nopl_signature)
370 set_cpu_cap(c, X86_FEATURE_NOPL);
371 }
372} 356}
373 357
374static void __cpuinit generic_identify(struct cpuinfo_x86 *c) 358static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index a11f5d4477cd..305b465889b0 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -430,6 +430,49 @@ static __init int setup_noclflush(char *arg)
430} 430}
431__setup("noclflush", setup_noclflush); 431__setup("noclflush", setup_noclflush);
432 432
433struct msr_range {
434 unsigned min;
435 unsigned max;
436};
437
438static struct msr_range msr_range_array[] __cpuinitdata = {
439 { 0x00000000, 0x00000418},
440 { 0xc0000000, 0xc000040b},
441 { 0xc0010000, 0xc0010142},
442 { 0xc0011000, 0xc001103b},
443};
444
445static void __cpuinit print_cpu_msr(void)
446{
447 unsigned index;
448 u64 val;
449 int i;
450 unsigned index_min, index_max;
451
452 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
453 index_min = msr_range_array[i].min;
454 index_max = msr_range_array[i].max;
455 for (index = index_min; index < index_max; index++) {
456 if (rdmsrl_amd_safe(index, &val))
457 continue;
458 printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
459 }
460 }
461}
462
463static int show_msr __cpuinitdata;
464static __init int setup_show_msr(char *arg)
465{
466 int num;
467
468 get_option(&arg, &num);
469
470 if (num > 0)
471 show_msr = num;
472 return 1;
473}
474__setup("show_msr=", setup_show_msr);
475
433void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) 476void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
434{ 477{
435 if (c->x86_model_id[0]) 478 if (c->x86_model_id[0])
@@ -439,6 +482,14 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
439 printk(KERN_CONT " stepping %02x\n", c->x86_mask); 482 printk(KERN_CONT " stepping %02x\n", c->x86_mask);
440 else 483 else
441 printk(KERN_CONT "\n"); 484 printk(KERN_CONT "\n");
485
486#ifdef CONFIG_SMP
487 if (c->cpu_index < show_msr)
488 print_cpu_msr();
489#else
490 if (show_msr)
491 print_cpu_msr();
492#endif
442} 493}
443 494
444static __init int setup_disablecpuid(char *arg) 495static __init int setup_disablecpuid(char *arg)
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index f1685fb91fbd..b8e05ee4f736 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -171,7 +171,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
171 } 171 }
172 172
173 if (c->x86 != 0xF) { 173 if (c->x86 != 0xF) {
174 printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@lists.linux.org.uk>\n"); 174 printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n");
175 return 0; 175 return 0;
176 } 176 }
177 177
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 15e13c01cc36..3b5f06423e77 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -26,7 +26,7 @@
26#include <asm/cpufeature.h> 26#include <asm/cpufeature.h>
27 27
28#define PFX "speedstep-centrino: " 28#define PFX "speedstep-centrino: "
29#define MAINTAINER "cpufreq@lists.linux.org.uk" 29#define MAINTAINER "cpufreq@vger.kernel.org"
30 30
31#define dprintk(msg...) \ 31#define dprintk(msg...) \
32 cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) 32 cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b75f2569b8f8..f113ef4595f6 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
222 set_cpu_cap(c, X86_FEATURE_BTS); 222 set_cpu_cap(c, X86_FEATURE_BTS);
223 if (!(l1 & (1<<12))) 223 if (!(l1 & (1<<12)))
224 set_cpu_cap(c, X86_FEATURE_PEBS); 224 set_cpu_cap(c, X86_FEATURE_PEBS);
225 ds_init_intel(c);
225 } 226 }
226 227
227 if (cpu_has_bts) 228 if (cpu_has_bts)
228 ds_init_intel(c); 229 ptrace_bts_init_intel(c);
229 230
230 /* 231 /*
231 * See if we have a good local APIC by checking for buggy Pentia, 232 * See if we have a good local APIC by checking for buggy Pentia,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index cb7d3b6a80eb..4e8d77f01eeb 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -401,12 +401,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
401 tmp |= ~((1<<(hi - 1)) - 1); 401 tmp |= ~((1<<(hi - 1)) - 1);
402 402
403 if (tmp != mask_lo) { 403 if (tmp != mask_lo) {
404 static int once = 1; 404 WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
405
406 if (once) {
407 printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
408 once = 0;
409 }
410 mask_lo = tmp; 405 mask_lo = tmp;
411 } 406 }
412 } 407 }
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 84c480bb3715..4c4214690dd1 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -405,9 +405,9 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
405 } 405 }
406 /* RED-PEN: base can be > 32bit */ 406 /* RED-PEN: base can be > 32bit */
407 len += seq_printf(seq, 407 len += seq_printf(seq,
408 "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", 408 "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
409 i, base, base >> (20 - PAGE_SHIFT), size, factor, 409 i, base, base >> (20 - PAGE_SHIFT), size, factor,
410 mtrr_attrib_to_str(type), mtrr_usage_table[i]); 410 mtrr_usage_table[i], mtrr_attrib_to_str(type));
411 } 411 }
412 } 412 }
413 return 0; 413 return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index b117d7f8a564..c78c04821ea1 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -729,7 +729,7 @@ struct var_mtrr_range_state {
729 mtrr_type type; 729 mtrr_type type;
730}; 730};
731 731
732struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; 732static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
733static int __initdata debug_print; 733static int __initdata debug_print;
734 734
735static int __init 735static int __init
@@ -759,7 +759,8 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
759 /* take out UC ranges */ 759 /* take out UC ranges */
760 for (i = 0; i < num_var_ranges; i++) { 760 for (i = 0; i < num_var_ranges; i++) {
761 type = range_state[i].type; 761 type = range_state[i].type;
762 if (type != MTRR_TYPE_UNCACHABLE) 762 if (type != MTRR_TYPE_UNCACHABLE &&
763 type != MTRR_TYPE_WRPROT)
763 continue; 764 continue;
764 size = range_state[i].size_pfn; 765 size = range_state[i].size_pfn;
765 if (!size) 766 if (!size)
@@ -834,7 +835,14 @@ static int __init enable_mtrr_cleanup_setup(char *str)
834 enable_mtrr_cleanup = 1; 835 enable_mtrr_cleanup = 1;
835 return 0; 836 return 0;
836} 837}
837early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); 838early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
839
840static int __init mtrr_cleanup_debug_setup(char *str)
841{
842 debug_print = 1;
843 return 0;
844}
845early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
838 846
839struct var_mtrr_state { 847struct var_mtrr_state {
840 unsigned long range_startk; 848 unsigned long range_startk;
@@ -898,6 +906,27 @@ set_var_mtrr_all(unsigned int address_bits)
898 } 906 }
899} 907}
900 908
909static unsigned long to_size_factor(unsigned long sizek, char *factorp)
910{
911 char factor;
912 unsigned long base = sizek;
913
914 if (base & ((1<<10) - 1)) {
915 /* not MB alignment */
916 factor = 'K';
917 } else if (base & ((1<<20) - 1)){
918 factor = 'M';
919 base >>= 10;
920 } else {
921 factor = 'G';
922 base >>= 20;
923 }
924
925 *factorp = factor;
926
927 return base;
928}
929
901static unsigned int __init 930static unsigned int __init
902range_to_mtrr(unsigned int reg, unsigned long range_startk, 931range_to_mtrr(unsigned int reg, unsigned long range_startk,
903 unsigned long range_sizek, unsigned char type) 932 unsigned long range_sizek, unsigned char type)
@@ -919,13 +948,21 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
919 align = max_align; 948 align = max_align;
920 949
921 sizek = 1 << align; 950 sizek = 1 << align;
922 if (debug_print) 951 if (debug_print) {
952 char start_factor = 'K', size_factor = 'K';
953 unsigned long start_base, size_base;
954
955 start_base = to_size_factor(range_startk, &start_factor),
956 size_base = to_size_factor(sizek, &size_factor),
957
923 printk(KERN_DEBUG "Setting variable MTRR %d, " 958 printk(KERN_DEBUG "Setting variable MTRR %d, "
924 "base: %ldMB, range: %ldMB, type %s\n", 959 "base: %ld%cB, range: %ld%cB, type %s\n",
925 reg, range_startk >> 10, sizek >> 10, 960 reg, start_base, start_factor,
961 size_base, size_factor,
926 (type == MTRR_TYPE_UNCACHABLE)?"UC": 962 (type == MTRR_TYPE_UNCACHABLE)?"UC":
927 ((type == MTRR_TYPE_WRBACK)?"WB":"Other") 963 ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
928 ); 964 );
965 }
929 save_var_mtrr(reg++, range_startk, sizek, type); 966 save_var_mtrr(reg++, range_startk, sizek, type);
930 range_startk += sizek; 967 range_startk += sizek;
931 range_sizek -= sizek; 968 range_sizek -= sizek;
@@ -970,6 +1007,8 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
970 /* try to append some small hole */ 1007 /* try to append some small hole */
971 range0_basek = state->range_startk; 1008 range0_basek = state->range_startk;
972 range0_sizek = ALIGN(state->range_sizek, chunk_sizek); 1009 range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
1010
1011 /* no increase */
973 if (range0_sizek == state->range_sizek) { 1012 if (range0_sizek == state->range_sizek) {
974 if (debug_print) 1013 if (debug_print)
975 printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", 1014 printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
@@ -980,13 +1019,40 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
980 return 0; 1019 return 0;
981 } 1020 }
982 1021
983 range0_sizek -= chunk_sizek; 1022 /* only cut back, when it is not the last */
984 if (range0_sizek && sizek) { 1023 if (sizek) {
985 while (range0_basek + range0_sizek > (basek + sizek)) { 1024 while (range0_basek + range0_sizek > (basek + sizek)) {
986 range0_sizek -= chunk_sizek; 1025 if (range0_sizek >= chunk_sizek)
987 if (!range0_sizek) 1026 range0_sizek -= chunk_sizek;
988 break; 1027 else
989 } 1028 range0_sizek = 0;
1029
1030 if (!range0_sizek)
1031 break;
1032 }
1033 }
1034
1035second_try:
1036 range_basek = range0_basek + range0_sizek;
1037
1038 /* one hole in the middle */
1039 if (range_basek > basek && range_basek <= (basek + sizek))
1040 second_sizek = range_basek - basek;
1041
1042 if (range0_sizek > state->range_sizek) {
1043
1044 /* one hole in middle or at end */
1045 hole_sizek = range0_sizek - state->range_sizek - second_sizek;
1046
1047 /* hole size should be less than half of range0 size */
1048 if (hole_sizek >= (range0_sizek >> 1) &&
1049 range0_sizek >= chunk_sizek) {
1050 range0_sizek -= chunk_sizek;
1051 second_sizek = 0;
1052 hole_sizek = 0;
1053
1054 goto second_try;
1055 }
990 } 1056 }
991 1057
992 if (range0_sizek) { 1058 if (range0_sizek) {
@@ -996,50 +1062,28 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
996 (range0_basek + range0_sizek)<<10); 1062 (range0_basek + range0_sizek)<<10);
997 state->reg = range_to_mtrr(state->reg, range0_basek, 1063 state->reg = range_to_mtrr(state->reg, range0_basek,
998 range0_sizek, MTRR_TYPE_WRBACK); 1064 range0_sizek, MTRR_TYPE_WRBACK);
999
1000 }
1001
1002 range_basek = range0_basek + range0_sizek;
1003 range_sizek = chunk_sizek;
1004
1005 if (range_basek + range_sizek > basek &&
1006 range_basek + range_sizek <= (basek + sizek)) {
1007 /* one hole */
1008 second_basek = basek;
1009 second_sizek = range_basek + range_sizek - basek;
1010 } 1065 }
1011 1066
1012 /* if last piece, only could one hole near end */ 1067 if (range0_sizek < state->range_sizek) {
1013 if ((second_basek || !basek) && 1068 /* need to handle left over */
1014 range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
1015 (chunk_sizek >> 1)) {
1016 /*
1017 * one hole in middle (second_sizek is 0) or at end
1018 * (second_sizek is 0 )
1019 */
1020 hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
1021 - second_sizek;
1022 hole_basek = range_basek + range_sizek - hole_sizek
1023 - second_sizek;
1024 } else {
1025 /* fallback for big hole, or several holes */
1026 range_sizek = state->range_sizek - range0_sizek; 1069 range_sizek = state->range_sizek - range0_sizek;
1027 second_basek = 0; 1070
1028 second_sizek = 0; 1071 if (debug_print)
1072 printk(KERN_DEBUG "range: %016lx - %016lx\n",
1073 range_basek<<10,
1074 (range_basek + range_sizek)<<10);
1075 state->reg = range_to_mtrr(state->reg, range_basek,
1076 range_sizek, MTRR_TYPE_WRBACK);
1029 } 1077 }
1030 1078
1031 if (debug_print)
1032 printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
1033 (range_basek + range_sizek)<<10);
1034 state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
1035 MTRR_TYPE_WRBACK);
1036 if (hole_sizek) { 1079 if (hole_sizek) {
1080 hole_basek = range_basek - hole_sizek - second_sizek;
1037 if (debug_print) 1081 if (debug_print)
1038 printk(KERN_DEBUG "hole: %016lx - %016lx\n", 1082 printk(KERN_DEBUG "hole: %016lx - %016lx\n",
1039 hole_basek<<10, (hole_basek + hole_sizek)<<10); 1083 hole_basek<<10,
1040 state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, 1084 (hole_basek + hole_sizek)<<10);
1041 MTRR_TYPE_UNCACHABLE); 1085 state->reg = range_to_mtrr(state->reg, hole_basek,
1042 1086 hole_sizek, MTRR_TYPE_UNCACHABLE);
1043 } 1087 }
1044 1088
1045 return second_sizek; 1089 return second_sizek;
@@ -1154,11 +1198,11 @@ struct mtrr_cleanup_result {
1154}; 1198};
1155 1199
1156/* 1200/*
1157 * gran_size: 1M, 2M, ..., 2G 1201 * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
1158 * chunk size: gran_size, ..., 4G 1202 * chunk size: gran_size, ..., 2G
1159 * so we need (2+13)*6 1203 * so we need (1+16)*8
1160 */ 1204 */
1161#define NUM_RESULT 90 1205#define NUM_RESULT 136
1162#define PSHIFT (PAGE_SHIFT - 10) 1206#define PSHIFT (PAGE_SHIFT - 10)
1163 1207
1164static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; 1208static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
@@ -1168,13 +1212,14 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM];
1168static int __init mtrr_cleanup(unsigned address_bits) 1212static int __init mtrr_cleanup(unsigned address_bits)
1169{ 1213{
1170 unsigned long extra_remove_base, extra_remove_size; 1214 unsigned long extra_remove_base, extra_remove_size;
1171 unsigned long i, base, size, def, dummy; 1215 unsigned long base, size, def, dummy;
1172 mtrr_type type; 1216 mtrr_type type;
1173 int nr_range, nr_range_new; 1217 int nr_range, nr_range_new;
1174 u64 chunk_size, gran_size; 1218 u64 chunk_size, gran_size;
1175 unsigned long range_sums, range_sums_new; 1219 unsigned long range_sums, range_sums_new;
1176 int index_good; 1220 int index_good;
1177 int num_reg_good; 1221 int num_reg_good;
1222 int i;
1178 1223
1179 /* extra one for all 0 */ 1224 /* extra one for all 0 */
1180 int num[MTRR_NUM_TYPES + 1]; 1225 int num[MTRR_NUM_TYPES + 1];
@@ -1204,6 +1249,8 @@ static int __init mtrr_cleanup(unsigned address_bits)
1204 continue; 1249 continue;
1205 if (!size) 1250 if (!size)
1206 type = MTRR_NUM_TYPES; 1251 type = MTRR_NUM_TYPES;
1252 if (type == MTRR_TYPE_WRPROT)
1253 type = MTRR_TYPE_UNCACHABLE;
1207 num[type]++; 1254 num[type]++;
1208 } 1255 }
1209 1256
@@ -1216,23 +1263,57 @@ static int __init mtrr_cleanup(unsigned address_bits)
1216 num_var_ranges - num[MTRR_NUM_TYPES]) 1263 num_var_ranges - num[MTRR_NUM_TYPES])
1217 return 0; 1264 return 0;
1218 1265
1266 /* print original var MTRRs at first, for debugging: */
1267 printk(KERN_DEBUG "original variable MTRRs\n");
1268 for (i = 0; i < num_var_ranges; i++) {
1269 char start_factor = 'K', size_factor = 'K';
1270 unsigned long start_base, size_base;
1271
1272 size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
1273 if (!size_base)
1274 continue;
1275
1276 size_base = to_size_factor(size_base, &size_factor),
1277 start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
1278 start_base = to_size_factor(start_base, &start_factor),
1279 type = range_state[i].type;
1280
1281 printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
1282 i, start_base, start_factor,
1283 size_base, size_factor,
1284 (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
1285 ((type == MTRR_TYPE_WRPROT) ? "WP" :
1286 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
1287 );
1288 }
1289
1219 memset(range, 0, sizeof(range)); 1290 memset(range, 0, sizeof(range));
1220 extra_remove_size = 0; 1291 extra_remove_size = 0;
1221 if (mtrr_tom2) { 1292 extra_remove_base = 1 << (32 - PAGE_SHIFT);
1222 extra_remove_base = 1 << (32 - PAGE_SHIFT); 1293 if (mtrr_tom2)
1223 extra_remove_size = 1294 extra_remove_size =
1224 (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; 1295 (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
1225 }
1226 nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, 1296 nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
1227 extra_remove_size); 1297 extra_remove_size);
1298 /*
1299 * [0, 1M) should always be coverred by var mtrr with WB
1300 * and fixed mtrrs should take effective before var mtrr for it
1301 */
1302 nr_range = add_range_with_merge(range, nr_range, 0,
1303 (1ULL<<(20 - PAGE_SHIFT)) - 1);
1304 /* sort the ranges */
1305 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
1306
1228 range_sums = sum_ranges(range, nr_range); 1307 range_sums = sum_ranges(range, nr_range);
1229 printk(KERN_INFO "total RAM coverred: %ldM\n", 1308 printk(KERN_INFO "total RAM coverred: %ldM\n",
1230 range_sums >> (20 - PAGE_SHIFT)); 1309 range_sums >> (20 - PAGE_SHIFT));
1231 1310
1232 if (mtrr_chunk_size && mtrr_gran_size) { 1311 if (mtrr_chunk_size && mtrr_gran_size) {
1233 int num_reg; 1312 int num_reg;
1313 char gran_factor, chunk_factor, lose_factor;
1314 unsigned long gran_base, chunk_base, lose_base;
1234 1315
1235 debug_print = 1; 1316 debug_print++;
1236 /* convert ranges to var ranges state */ 1317 /* convert ranges to var ranges state */
1237 num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, 1318 num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
1238 mtrr_gran_size); 1319 mtrr_gran_size);
@@ -1256,34 +1337,48 @@ static int __init mtrr_cleanup(unsigned address_bits)
1256 result[i].lose_cover_sizek = 1337 result[i].lose_cover_sizek =
1257 (range_sums - range_sums_new) << PSHIFT; 1338 (range_sums - range_sums_new) << PSHIFT;
1258 1339
1259 printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", 1340 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
1260 result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10, 1341 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
1261 result[i].chunk_sizek >> 10); 1342 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1262 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", 1343 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
1344 result[i].bad?"*BAD*":" ",
1345 gran_base, gran_factor, chunk_base, chunk_factor);
1346 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
1263 result[i].num_reg, result[i].bad?"-":"", 1347 result[i].num_reg, result[i].bad?"-":"",
1264 result[i].lose_cover_sizek >> 10); 1348 lose_base, lose_factor);
1265 if (!result[i].bad) { 1349 if (!result[i].bad) {
1266 set_var_mtrr_all(address_bits); 1350 set_var_mtrr_all(address_bits);
1267 return 1; 1351 return 1;
1268 } 1352 }
1269 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " 1353 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
1270 "will find optimal one\n"); 1354 "will find optimal one\n");
1271 debug_print = 0; 1355 debug_print--;
1272 memset(result, 0, sizeof(result[0])); 1356 memset(result, 0, sizeof(result[0]));
1273 } 1357 }
1274 1358
1275 i = 0; 1359 i = 0;
1276 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); 1360 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
1277 memset(result, 0, sizeof(result)); 1361 memset(result, 0, sizeof(result));
1278 for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { 1362 for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
1279 for (chunk_size = gran_size; chunk_size < (1ULL<<33); 1363 char gran_factor;
1364 unsigned long gran_base;
1365
1366 if (debug_print)
1367 gran_base = to_size_factor(gran_size >> 10, &gran_factor);
1368
1369 for (chunk_size = gran_size; chunk_size < (1ULL<<32);
1280 chunk_size <<= 1) { 1370 chunk_size <<= 1) {
1281 int num_reg; 1371 int num_reg;
1282 1372
1283 if (debug_print) 1373 if (debug_print) {
1284 printk(KERN_INFO 1374 char chunk_factor;
1285 "\ngran_size: %lldM chunk_size_size: %lldM\n", 1375 unsigned long chunk_base;
1286 gran_size >> 20, chunk_size >> 20); 1376
1377 chunk_base = to_size_factor(chunk_size>>10, &chunk_factor),
1378 printk(KERN_INFO "\n");
1379 printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n",
1380 gran_base, gran_factor, chunk_base, chunk_factor);
1381 }
1287 if (i >= NUM_RESULT) 1382 if (i >= NUM_RESULT)
1288 continue; 1383 continue;
1289 1384
@@ -1326,12 +1421,18 @@ static int __init mtrr_cleanup(unsigned address_bits)
1326 1421
1327 /* print out all */ 1422 /* print out all */
1328 for (i = 0; i < NUM_RESULT; i++) { 1423 for (i = 0; i < NUM_RESULT; i++) {
1329 printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", 1424 char gran_factor, chunk_factor, lose_factor;
1330 result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, 1425 unsigned long gran_base, chunk_base, lose_base;
1331 result[i].chunk_sizek >> 10); 1426
1332 printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n", 1427 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
1333 result[i].num_reg, result[i].bad?"-":"", 1428 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
1334 result[i].lose_cover_sizek >> 10); 1429 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1430 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
1431 result[i].bad?"*BAD*":" ",
1432 gran_base, gran_factor, chunk_base, chunk_factor);
1433 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
1434 result[i].num_reg, result[i].bad?"-":"",
1435 lose_base, lose_factor);
1335 } 1436 }
1336 1437
1337 /* try to find the optimal index */ 1438 /* try to find the optimal index */
@@ -1339,10 +1440,8 @@ static int __init mtrr_cleanup(unsigned address_bits)
1339 nr_mtrr_spare_reg = num_var_ranges - 1; 1440 nr_mtrr_spare_reg = num_var_ranges - 1;
1340 num_reg_good = -1; 1441 num_reg_good = -1;
1341 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { 1442 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
1342 if (!min_loss_pfn[i]) { 1443 if (!min_loss_pfn[i])
1343 num_reg_good = i; 1444 num_reg_good = i;
1344 break;
1345 }
1346 } 1445 }
1347 1446
1348 index_good = -1; 1447 index_good = -1;
@@ -1358,21 +1457,26 @@ static int __init mtrr_cleanup(unsigned address_bits)
1358 } 1457 }
1359 1458
1360 if (index_good != -1) { 1459 if (index_good != -1) {
1460 char gran_factor, chunk_factor, lose_factor;
1461 unsigned long gran_base, chunk_base, lose_base;
1462
1361 printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); 1463 printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
1362 i = index_good; 1464 i = index_good;
1363 printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", 1465 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
1364 result[i].gran_sizek >> 10, 1466 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
1365 result[i].chunk_sizek >> 10); 1467 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1366 printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n", 1468 printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t",
1367 result[i].num_reg, 1469 gran_base, gran_factor, chunk_base, chunk_factor);
1368 result[i].lose_cover_sizek >> 10); 1470 printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n",
1471 result[i].num_reg, lose_base, lose_factor);
1369 /* convert ranges to var ranges state */ 1472 /* convert ranges to var ranges state */
1370 chunk_size = result[i].chunk_sizek; 1473 chunk_size = result[i].chunk_sizek;
1371 chunk_size <<= 10; 1474 chunk_size <<= 10;
1372 gran_size = result[i].gran_sizek; 1475 gran_size = result[i].gran_sizek;
1373 gran_size <<= 10; 1476 gran_size <<= 10;
1374 debug_print = 1; 1477 debug_print++;
1375 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); 1478 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
1479 debug_print--;
1376 set_var_mtrr_all(address_bits); 1480 set_var_mtrr_all(address_bits);
1377 return 1; 1481 return 1;
1378 } 1482 }
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 05cc22dbd4ff..6bff382094f5 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -295,13 +295,19 @@ static int setup_k7_watchdog(unsigned nmi_hz)
295 /* setup the timer */ 295 /* setup the timer */
296 wrmsr(evntsel_msr, evntsel, 0); 296 wrmsr(evntsel_msr, evntsel, 0);
297 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); 297 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
298 apic_write(APIC_LVTPC, APIC_DM_NMI);
299 evntsel |= K7_EVNTSEL_ENABLE;
300 wrmsr(evntsel_msr, evntsel, 0);
301 298
299 /* initialize the wd struct before enabling */
302 wd->perfctr_msr = perfctr_msr; 300 wd->perfctr_msr = perfctr_msr;
303 wd->evntsel_msr = evntsel_msr; 301 wd->evntsel_msr = evntsel_msr;
304 wd->cccr_msr = 0; /* unused */ 302 wd->cccr_msr = 0; /* unused */
303
304 /* ok, everything is initialized, announce that we're set */
305 cpu_nmi_set_wd_enabled();
306
307 apic_write(APIC_LVTPC, APIC_DM_NMI);
308 evntsel |= K7_EVNTSEL_ENABLE;
309 wrmsr(evntsel_msr, evntsel, 0);
310
305 return 1; 311 return 1;
306} 312}
307 313
@@ -379,13 +385,19 @@ static int setup_p6_watchdog(unsigned nmi_hz)
379 wrmsr(evntsel_msr, evntsel, 0); 385 wrmsr(evntsel_msr, evntsel, 0);
380 nmi_hz = adjust_for_32bit_ctr(nmi_hz); 386 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
381 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); 387 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
382 apic_write(APIC_LVTPC, APIC_DM_NMI);
383 evntsel |= P6_EVNTSEL0_ENABLE;
384 wrmsr(evntsel_msr, evntsel, 0);
385 388
389 /* initialize the wd struct before enabling */
386 wd->perfctr_msr = perfctr_msr; 390 wd->perfctr_msr = perfctr_msr;
387 wd->evntsel_msr = evntsel_msr; 391 wd->evntsel_msr = evntsel_msr;
388 wd->cccr_msr = 0; /* unused */ 392 wd->cccr_msr = 0; /* unused */
393
394 /* ok, everything is initialized, announce that we're set */
395 cpu_nmi_set_wd_enabled();
396
397 apic_write(APIC_LVTPC, APIC_DM_NMI);
398 evntsel |= P6_EVNTSEL0_ENABLE;
399 wrmsr(evntsel_msr, evntsel, 0);
400
389 return 1; 401 return 1;
390} 402}
391 403
@@ -432,6 +444,27 @@ static const struct wd_ops p6_wd_ops = {
432#define P4_CCCR_ENABLE (1 << 12) 444#define P4_CCCR_ENABLE (1 << 12)
433#define P4_CCCR_OVF (1 << 31) 445#define P4_CCCR_OVF (1 << 31)
434 446
447#define P4_CONTROLS 18
448static unsigned int p4_controls[18] = {
449 MSR_P4_BPU_CCCR0,
450 MSR_P4_BPU_CCCR1,
451 MSR_P4_BPU_CCCR2,
452 MSR_P4_BPU_CCCR3,
453 MSR_P4_MS_CCCR0,
454 MSR_P4_MS_CCCR1,
455 MSR_P4_MS_CCCR2,
456 MSR_P4_MS_CCCR3,
457 MSR_P4_FLAME_CCCR0,
458 MSR_P4_FLAME_CCCR1,
459 MSR_P4_FLAME_CCCR2,
460 MSR_P4_FLAME_CCCR3,
461 MSR_P4_IQ_CCCR0,
462 MSR_P4_IQ_CCCR1,
463 MSR_P4_IQ_CCCR2,
464 MSR_P4_IQ_CCCR3,
465 MSR_P4_IQ_CCCR4,
466 MSR_P4_IQ_CCCR5,
467};
435/* 468/*
436 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter 469 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
437 * CRU_ESCR0 (with any non-null event selector) through a complemented 470 * CRU_ESCR0 (with any non-null event selector) through a complemented
@@ -473,6 +506,26 @@ static int setup_p4_watchdog(unsigned nmi_hz)
473 evntsel_msr = MSR_P4_CRU_ESCR0; 506 evntsel_msr = MSR_P4_CRU_ESCR0;
474 cccr_msr = MSR_P4_IQ_CCCR0; 507 cccr_msr = MSR_P4_IQ_CCCR0;
475 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); 508 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
509
510 /*
511 * If we're on the kdump kernel or other situation, we may
512 * still have other performance counter registers set to
513 * interrupt and they'll keep interrupting forever because
514 * of the P4_CCCR_OVF quirk. So we need to ACK all the
515 * pending interrupts and disable all the registers here,
516 * before reenabling the NMI delivery. Refer to p4_rearm()
517 * about the P4_CCCR_OVF quirk.
518 */
519 if (reset_devices) {
520 unsigned int low, high;
521 int i;
522
523 for (i = 0; i < P4_CONTROLS; i++) {
524 rdmsr(p4_controls[i], low, high);
525 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
526 wrmsr(p4_controls[i], low, high);
527 }
528 }
476 } else { 529 } else {
477 /* logical cpu 1 */ 530 /* logical cpu 1 */
478 perfctr_msr = MSR_P4_IQ_PERFCTR1; 531 perfctr_msr = MSR_P4_IQ_PERFCTR1;
@@ -499,12 +552,17 @@ static int setup_p4_watchdog(unsigned nmi_hz)
499 wrmsr(evntsel_msr, evntsel, 0); 552 wrmsr(evntsel_msr, evntsel, 0);
500 wrmsr(cccr_msr, cccr_val, 0); 553 wrmsr(cccr_msr, cccr_val, 0);
501 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); 554 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
502 apic_write(APIC_LVTPC, APIC_DM_NMI); 555
503 cccr_val |= P4_CCCR_ENABLE;
504 wrmsr(cccr_msr, cccr_val, 0);
505 wd->perfctr_msr = perfctr_msr; 556 wd->perfctr_msr = perfctr_msr;
506 wd->evntsel_msr = evntsel_msr; 557 wd->evntsel_msr = evntsel_msr;
507 wd->cccr_msr = cccr_msr; 558 wd->cccr_msr = cccr_msr;
559
560 /* ok, everything is initialized, announce that we're set */
561 cpu_nmi_set_wd_enabled();
562
563 apic_write(APIC_LVTPC, APIC_DM_NMI);
564 cccr_val |= P4_CCCR_ENABLE;
565 wrmsr(cccr_msr, cccr_val, 0);
508 return 1; 566 return 1;
509} 567}
510 568
@@ -620,13 +678,17 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
620 wrmsr(evntsel_msr, evntsel, 0); 678 wrmsr(evntsel_msr, evntsel, 0);
621 nmi_hz = adjust_for_32bit_ctr(nmi_hz); 679 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
622 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); 680 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
623 apic_write(APIC_LVTPC, APIC_DM_NMI);
624 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
625 wrmsr(evntsel_msr, evntsel, 0);
626 681
627 wd->perfctr_msr = perfctr_msr; 682 wd->perfctr_msr = perfctr_msr;
628 wd->evntsel_msr = evntsel_msr; 683 wd->evntsel_msr = evntsel_msr;
629 wd->cccr_msr = 0; /* unused */ 684 wd->cccr_msr = 0; /* unused */
685
686 /* ok, everything is initialized, announce that we're set */
687 cpu_nmi_set_wd_enabled();
688
689 apic_write(APIC_LVTPC, APIC_DM_NMI);
690 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
691 wrmsr(evntsel_msr, evntsel, 0);
630 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); 692 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
631 return 1; 693 return 1;
632} 694}
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 8e9cd6a8ec12..6a44d6465991 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -36,7 +36,6 @@
36#include <linux/smp_lock.h> 36#include <linux/smp_lock.h>
37#include <linux/major.h> 37#include <linux/major.h>
38#include <linux/fs.h> 38#include <linux/fs.h>
39#include <linux/smp_lock.h>
40#include <linux/device.h> 39#include <linux/device.h>
41#include <linux/cpu.h> 40#include <linux/cpu.h>
42#include <linux/notifier.h> 41#include <linux/notifier.h>
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 15e6c6bc4a46..e90a60ef10c2 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -7,9 +7,8 @@
7 7
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/crash_dump.h> 9#include <linux/crash_dump.h>
10 10#include <linux/uaccess.h>
11#include <asm/uaccess.h> 11#include <linux/io.h>
12#include <asm/io.h>
13 12
14/** 13/**
15 * copy_oldmem_page - copy one page from "oldmem" 14 * copy_oldmem_page - copy one page from "oldmem"
@@ -25,7 +24,7 @@
25 * in the current kernel. We stitch up a pte, similar to kmap_atomic. 24 * in the current kernel. We stitch up a pte, similar to kmap_atomic.
26 */ 25 */
27ssize_t copy_oldmem_page(unsigned long pfn, char *buf, 26ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
28 size_t csize, unsigned long offset, int userbuf) 27 size_t csize, unsigned long offset, int userbuf)
29{ 28{
30 void *vaddr; 29 void *vaddr;
31 30
@@ -33,14 +32,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
33 return 0; 32 return 0;
34 33
35 vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); 34 vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
35 if (!vaddr)
36 return -ENOMEM;
36 37
37 if (userbuf) { 38 if (userbuf) {
38 if (copy_to_user(buf, (vaddr + offset), csize)) { 39 if (copy_to_user(buf, vaddr + offset, csize)) {
39 iounmap(vaddr); 40 iounmap(vaddr);
40 return -EFAULT; 41 return -EFAULT;
41 } 42 }
42 } else 43 } else
43 memcpy(buf, (vaddr + offset), csize); 44 memcpy(buf, vaddr + offset, csize);
44 45
45 iounmap(vaddr); 46 iounmap(vaddr);
46 return csize; 47 return csize;
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 11c11b8ec48d..2b69994fd3a8 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -2,26 +2,49 @@
2 * Debug Store support 2 * Debug Store support
3 * 3 *
4 * This provides a low-level interface to the hardware's Debug Store 4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for last branch recording (LBR) and 5 * feature that is used for branch trace store (BTS) and
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * Different architectures use a different DS layout/pointer size. 8 * It manages:
9 * The below functions therefore work on a void*. 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional)
11 * - buffer overflow handling
12 * - buffer access
10 * 13 *
14 * It assumes:
15 * - get_task_struct on all parameter tasks
16 * - current is allowed to trace parameter tasks
11 * 17 *
12 * Since there is no user for PEBS, yet, only LBR (or branch
13 * trace store, BTS) is supported.
14 * 18 *
15 * 19 * Copyright (C) 2007-2008 Intel Corporation.
16 * Copyright (C) 2007 Intel Corporation. 20 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
17 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
18 */ 21 */
19 22
23
24#ifdef CONFIG_X86_DS
25
20#include <asm/ds.h> 26#include <asm/ds.h>
21 27
22#include <linux/errno.h> 28#include <linux/errno.h>
23#include <linux/string.h> 29#include <linux/string.h>
24#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/sched.h>
32#include <linux/mm.h>
33
34
35/*
36 * The configuration for a particular DS hardware implementation.
37 */
38struct ds_configuration {
39 /* the size of the DS structure in bytes */
40 unsigned char sizeof_ds;
41 /* the size of one pointer-typed field in the DS structure in bytes;
42 this covers the first 8 fields related to buffer management. */
43 unsigned char sizeof_field;
44 /* the size of a BTS/PEBS record in bytes */
45 unsigned char sizeof_rec[2];
46};
47static struct ds_configuration ds_cfg;
25 48
26 49
27/* 50/*
@@ -44,378 +67,747 @@
44 * (interrupt occurs when write pointer passes interrupt pointer) 67 * (interrupt occurs when write pointer passes interrupt pointer)
45 * - value to which counter is reset following counter overflow 68 * - value to which counter is reset following counter overflow
46 * 69 *
47 * On later architectures, the last branch recording hardware uses 70 * Later architectures use 64bit pointers throughout, whereas earlier
48 * 64bit pointers even in 32bit mode. 71 * architectures use 32bit pointers in 32bit mode.
49 *
50 *
51 * Branch Trace Store (BTS) records store information about control
52 * flow changes. They at least provide the following information:
53 * - source linear address
54 * - destination linear address
55 * 72 *
56 * Netburst supported a predicated bit that had been dropped in later
57 * architectures. We do not suppor it.
58 * 73 *
74 * We compute the base address for the first 8 fields based on:
75 * - the field size stored in the DS configuration
76 * - the relative field position
77 * - an offset giving the start of the respective region
59 * 78 *
60 * In order to abstract from the actual DS and BTS layout, we describe 79 * This offset is further used to index various arrays holding
61 * the access to the relevant fields. 80 * information for BTS and PEBS at the respective index.
62 * Thanks to Andi Kleen for proposing this design.
63 * 81 *
64 * The implementation, however, is not as general as it might seem. In 82 * On later 32bit processors, we only access the lower 32bit of the
65 * order to stay somewhat simple and efficient, we assume an 83 * 64bit pointer fields. The upper halves will be zeroed out.
66 * underlying unsigned type (mostly a pointer type) and we expect the
67 * field to be at least as big as that type.
68 */ 84 */
69 85
70/* 86enum ds_field {
71 * A special from_ip address to indicate that the BTS record is an 87 ds_buffer_base = 0,
72 * info record that needs to be interpreted or skipped. 88 ds_index,
73 */ 89 ds_absolute_maximum,
74#define BTS_ESCAPE_ADDRESS (-1) 90 ds_interrupt_threshold,
91};
75 92
76/* 93enum ds_qualifier {
77 * A field access descriptor 94 ds_bts = 0,
78 */ 95 ds_pebs
79struct access_desc {
80 unsigned char offset;
81 unsigned char size;
82}; 96};
83 97
98static inline unsigned long ds_get(const unsigned char *base,
99 enum ds_qualifier qual, enum ds_field field)
100{
101 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
102 return *(unsigned long *)base;
103}
104
105static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
106 enum ds_field field, unsigned long value)
107{
108 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
109 (*(unsigned long *)base) = value;
110}
111
112
84/* 113/*
85 * The configuration for a particular DS/BTS hardware implementation. 114 * Locking is done only for allocating BTS or PEBS resources and for
115 * guarding context and buffer memory allocation.
116 *
117 * Most functions require the current task to own the ds context part
118 * they are going to access. All the locking is done when validating
119 * access to the context.
86 */ 120 */
87struct ds_configuration { 121static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
88 /* the DS configuration */
89 unsigned char sizeof_ds;
90 struct access_desc bts_buffer_base;
91 struct access_desc bts_index;
92 struct access_desc bts_absolute_maximum;
93 struct access_desc bts_interrupt_threshold;
94 /* the BTS configuration */
95 unsigned char sizeof_bts;
96 struct access_desc from_ip;
97 struct access_desc to_ip;
98 /* BTS variants used to store additional information like
99 timestamps */
100 struct access_desc info_type;
101 struct access_desc info_data;
102 unsigned long debugctl_mask;
103};
104 122
105/* 123/*
106 * The global configuration used by the below accessor functions 124 * Validate that the current task is allowed to access the BTS/PEBS
125 * buffer of the parameter task.
126 *
127 * Returns 0, if access is granted; -Eerrno, otherwise.
107 */ 128 */
108static struct ds_configuration ds_cfg; 129static inline int ds_validate_access(struct ds_context *context,
130 enum ds_qualifier qual)
131{
132 if (!context)
133 return -EPERM;
134
135 if (context->owner[qual] == current)
136 return 0;
137
138 return -EPERM;
139}
140
109 141
110/* 142/*
111 * Accessor functions for some DS and BTS fields using the above 143 * We either support (system-wide) per-cpu or per-thread allocation.
112 * global ptrace_bts_cfg. 144 * We distinguish the two based on the task_struct pointer, where a
145 * NULL pointer indicates per-cpu allocation for the current cpu.
146 *
147 * Allocations are use-counted. As soon as resources are allocated,
148 * further allocations must be of the same type (per-cpu or
149 * per-thread). We model this by counting allocations (i.e. the number
150 * of tracers of a certain type) for one type negatively:
151 * =0 no tracers
152 * >0 number of per-thread tracers
153 * <0 number of per-cpu tracers
154 *
155 * The below functions to get and put tracers and to check the
156 * allocation type require the ds_lock to be held by the caller.
157 *
158 * Tracers essentially gives the number of ds contexts for a certain
159 * type of allocation.
113 */ 160 */
114static inline unsigned long get_bts_buffer_base(char *base) 161static long tracers;
162
163static inline void get_tracer(struct task_struct *task)
115{ 164{
116 return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); 165 tracers += (task ? 1 : -1);
117} 166}
118static inline void set_bts_buffer_base(char *base, unsigned long value) 167
168static inline void put_tracer(struct task_struct *task)
119{ 169{
120 (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; 170 tracers -= (task ? 1 : -1);
121} 171}
122static inline unsigned long get_bts_index(char *base) 172
173static inline int check_tracer(struct task_struct *task)
123{ 174{
124 return *(unsigned long *)(base + ds_cfg.bts_index.offset); 175 return (task ? (tracers >= 0) : (tracers <= 0));
125} 176}
126static inline void set_bts_index(char *base, unsigned long value) 177
178
179/*
180 * The DS context is either attached to a thread or to a cpu:
181 * - in the former case, the thread_struct contains a pointer to the
182 * attached context.
183 * - in the latter case, we use a static array of per-cpu context
184 * pointers.
185 *
186 * Contexts are use-counted. They are allocated on first access and
187 * deallocated when the last user puts the context.
188 *
189 * We distinguish between an allocating and a non-allocating get of a
190 * context:
191 * - the allocating get is used for requesting BTS/PEBS resources. It
192 * requires the caller to hold the global ds_lock.
193 * - the non-allocating get is used for all other cases. A
194 * non-existing context indicates an error. It acquires and releases
195 * the ds_lock itself for obtaining the context.
196 *
197 * A context and its DS configuration are allocated and deallocated
198 * together. A context always has a DS configuration of the
199 * appropriate size.
200 */
201static DEFINE_PER_CPU(struct ds_context *, system_context);
202
203#define this_system_context per_cpu(system_context, smp_processor_id())
204
205/*
206 * Returns the pointer to the parameter task's context or to the
207 * system-wide context, if task is NULL.
208 *
209 * Increases the use count of the returned context, if not NULL.
210 */
211static inline struct ds_context *ds_get_context(struct task_struct *task)
127{ 212{
128 (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; 213 struct ds_context *context;
214
215 spin_lock(&ds_lock);
216
217 context = (task ? task->thread.ds_ctx : this_system_context);
218 if (context)
219 context->count++;
220
221 spin_unlock(&ds_lock);
222
223 return context;
129} 224}
130static inline unsigned long get_bts_absolute_maximum(char *base) 225
226/*
227 * Same as ds_get_context, but allocates the context and it's DS
228 * structure, if necessary; returns NULL; if out of memory.
229 *
230 * pre: requires ds_lock to be held
231 */
232static inline struct ds_context *ds_alloc_context(struct task_struct *task)
131{ 233{
132 return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); 234 struct ds_context **p_context =
235 (task ? &task->thread.ds_ctx : &this_system_context);
236 struct ds_context *context = *p_context;
237
238 if (!context) {
239 context = kzalloc(sizeof(*context), GFP_KERNEL);
240
241 if (!context)
242 return NULL;
243
244 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
245 if (!context->ds) {
246 kfree(context);
247 return NULL;
248 }
249
250 *p_context = context;
251
252 context->this = p_context;
253 context->task = task;
254
255 if (task)
256 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
257
258 if (!task || (task == current))
259 wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
260
261 get_tracer(task);
262 }
263
264 context->count++;
265
266 return context;
133} 267}
134static inline void set_bts_absolute_maximum(char *base, unsigned long value) 268
269/*
270 * Decreases the use count of the parameter context, if not NULL.
271 * Deallocates the context, if the use count reaches zero.
272 */
273static inline void ds_put_context(struct ds_context *context)
135{ 274{
136 (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; 275 if (!context)
276 return;
277
278 spin_lock(&ds_lock);
279
280 if (--context->count)
281 goto out;
282
283 *(context->this) = NULL;
284
285 if (context->task)
286 clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
287
288 if (!context->task || (context->task == current))
289 wrmsrl(MSR_IA32_DS_AREA, 0);
290
291 put_tracer(context->task);
292
293 /* free any leftover buffers from tracers that did not
294 * deallocate them properly. */
295 kfree(context->buffer[ds_bts]);
296 kfree(context->buffer[ds_pebs]);
297 kfree(context->ds);
298 kfree(context);
299 out:
300 spin_unlock(&ds_lock);
137} 301}
138static inline unsigned long get_bts_interrupt_threshold(char *base) 302
303
304/*
305 * Handle a buffer overflow
306 *
307 * task: the task whose buffers are overflowing;
308 * NULL for a buffer overflow on the current cpu
309 * context: the ds context
310 * qual: the buffer type
311 */
312static void ds_overflow(struct task_struct *task, struct ds_context *context,
313 enum ds_qualifier qual)
139{ 314{
140 return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); 315 if (!context)
316 return;
317
318 if (context->callback[qual])
319 (*context->callback[qual])(task);
320
321 /* todo: do some more overflow handling */
141} 322}
142static inline void set_bts_interrupt_threshold(char *base, unsigned long value) 323
324
325/*
326 * Allocate a non-pageable buffer of the parameter size.
327 * Checks the memory and the locked memory rlimit.
328 *
329 * Returns the buffer, if successful;
330 * NULL, if out of memory or rlimit exceeded.
331 *
332 * size: the requested buffer size in bytes
333 * pages (out): if not NULL, contains the number of pages reserved
334 */
335static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
143{ 336{
144 (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; 337 unsigned long rlim, vm, pgsz;
338 void *buffer;
339
340 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
341
342 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
343 vm = current->mm->total_vm + pgsz;
344 if (rlim < vm)
345 return NULL;
346
347 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
348 vm = current->mm->locked_vm + pgsz;
349 if (rlim < vm)
350 return NULL;
351
352 buffer = kzalloc(size, GFP_KERNEL);
353 if (!buffer)
354 return NULL;
355
356 current->mm->total_vm += pgsz;
357 current->mm->locked_vm += pgsz;
358
359 if (pages)
360 *pages = pgsz;
361
362 return buffer;
145} 363}
146static inline unsigned long get_from_ip(char *base) 364
365static int ds_request(struct task_struct *task, void *base, size_t size,
366 ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
147{ 367{
148 return *(unsigned long *)(base + ds_cfg.from_ip.offset); 368 struct ds_context *context;
369 unsigned long buffer, adj;
370 const unsigned long alignment = (1 << 3);
371 int error = 0;
372
373 if (!ds_cfg.sizeof_ds)
374 return -EOPNOTSUPP;
375
376 /* we require some space to do alignment adjustments below */
377 if (size < (alignment + ds_cfg.sizeof_rec[qual]))
378 return -EINVAL;
379
380 /* buffer overflow notification is not yet implemented */
381 if (ovfl)
382 return -EOPNOTSUPP;
383
384
385 spin_lock(&ds_lock);
386
387 if (!check_tracer(task))
388 return -EPERM;
389
390 error = -ENOMEM;
391 context = ds_alloc_context(task);
392 if (!context)
393 goto out_unlock;
394
395 error = -EALREADY;
396 if (context->owner[qual] == current)
397 goto out_unlock;
398 error = -EPERM;
399 if (context->owner[qual] != NULL)
400 goto out_unlock;
401 context->owner[qual] = current;
402
403 spin_unlock(&ds_lock);
404
405
406 error = -ENOMEM;
407 if (!base) {
408 base = ds_allocate_buffer(size, &context->pages[qual]);
409 if (!base)
410 goto out_release;
411
412 context->buffer[qual] = base;
413 }
414 error = 0;
415
416 context->callback[qual] = ovfl;
417
418 /* adjust the buffer address and size to meet alignment
419 * constraints:
420 * - buffer is double-word aligned
421 * - size is multiple of record size
422 *
423 * We checked the size at the very beginning; we have enough
424 * space to do the adjustment.
425 */
426 buffer = (unsigned long)base;
427
428 adj = ALIGN(buffer, alignment) - buffer;
429 buffer += adj;
430 size -= adj;
431
432 size /= ds_cfg.sizeof_rec[qual];
433 size *= ds_cfg.sizeof_rec[qual];
434
435 ds_set(context->ds, qual, ds_buffer_base, buffer);
436 ds_set(context->ds, qual, ds_index, buffer);
437 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
438
439 if (ovfl) {
440 /* todo: select a suitable interrupt threshold */
441 } else
442 ds_set(context->ds, qual,
443 ds_interrupt_threshold, buffer + size + 1);
444
445 /* we keep the context until ds_release */
446 return error;
447
448 out_release:
449 context->owner[qual] = NULL;
450 ds_put_context(context);
451 return error;
452
453 out_unlock:
454 spin_unlock(&ds_lock);
455 ds_put_context(context);
456 return error;
149} 457}
150static inline void set_from_ip(char *base, unsigned long value) 458
459int ds_request_bts(struct task_struct *task, void *base, size_t size,
460 ds_ovfl_callback_t ovfl)
151{ 461{
152 (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; 462 return ds_request(task, base, size, ovfl, ds_bts);
153} 463}
154static inline unsigned long get_to_ip(char *base) 464
465int ds_request_pebs(struct task_struct *task, void *base, size_t size,
466 ds_ovfl_callback_t ovfl)
155{ 467{
156 return *(unsigned long *)(base + ds_cfg.to_ip.offset); 468 return ds_request(task, base, size, ovfl, ds_pebs);
157} 469}
158static inline void set_to_ip(char *base, unsigned long value) 470
471static int ds_release(struct task_struct *task, enum ds_qualifier qual)
159{ 472{
160 (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; 473 struct ds_context *context;
474 int error;
475
476 context = ds_get_context(task);
477 error = ds_validate_access(context, qual);
478 if (error < 0)
479 goto out;
480
481 kfree(context->buffer[qual]);
482 context->buffer[qual] = NULL;
483
484 current->mm->total_vm -= context->pages[qual];
485 current->mm->locked_vm -= context->pages[qual];
486 context->pages[qual] = 0;
487 context->owner[qual] = NULL;
488
489 /*
490 * we put the context twice:
491 * once for the ds_get_context
492 * once for the corresponding ds_request
493 */
494 ds_put_context(context);
495 out:
496 ds_put_context(context);
497 return error;
161} 498}
162static inline unsigned char get_info_type(char *base) 499
500int ds_release_bts(struct task_struct *task)
163{ 501{
164 return *(unsigned char *)(base + ds_cfg.info_type.offset); 502 return ds_release(task, ds_bts);
165} 503}
166static inline void set_info_type(char *base, unsigned char value) 504
505int ds_release_pebs(struct task_struct *task)
167{ 506{
168 (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; 507 return ds_release(task, ds_pebs);
169} 508}
170static inline unsigned long get_info_data(char *base) 509
510static int ds_get_index(struct task_struct *task, size_t *pos,
511 enum ds_qualifier qual)
171{ 512{
172 return *(unsigned long *)(base + ds_cfg.info_data.offset); 513 struct ds_context *context;
514 unsigned long base, index;
515 int error;
516
517 context = ds_get_context(task);
518 error = ds_validate_access(context, qual);
519 if (error < 0)
520 goto out;
521
522 base = ds_get(context->ds, qual, ds_buffer_base);
523 index = ds_get(context->ds, qual, ds_index);
524
525 error = ((index - base) / ds_cfg.sizeof_rec[qual]);
526 if (pos)
527 *pos = error;
528 out:
529 ds_put_context(context);
530 return error;
173} 531}
174static inline void set_info_data(char *base, unsigned long value) 532
533int ds_get_bts_index(struct task_struct *task, size_t *pos)
175{ 534{
176 (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; 535 return ds_get_index(task, pos, ds_bts);
177} 536}
178 537
538int ds_get_pebs_index(struct task_struct *task, size_t *pos)
539{
540 return ds_get_index(task, pos, ds_pebs);
541}
179 542
180int ds_allocate(void **dsp, size_t bts_size_in_bytes) 543static int ds_get_end(struct task_struct *task, size_t *pos,
544 enum ds_qualifier qual)
181{ 545{
182 size_t bts_size_in_records; 546 struct ds_context *context;
183 unsigned long bts; 547 unsigned long base, end;
184 void *ds; 548 int error;
549
550 context = ds_get_context(task);
551 error = ds_validate_access(context, qual);
552 if (error < 0)
553 goto out;
554
555 base = ds_get(context->ds, qual, ds_buffer_base);
556 end = ds_get(context->ds, qual, ds_absolute_maximum);
557
558 error = ((end - base) / ds_cfg.sizeof_rec[qual]);
559 if (pos)
560 *pos = error;
561 out:
562 ds_put_context(context);
563 return error;
564}
185 565
186 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 566int ds_get_bts_end(struct task_struct *task, size_t *pos)
187 return -EOPNOTSUPP; 567{
568 return ds_get_end(task, pos, ds_bts);
569}
188 570
189 if (bts_size_in_bytes < 0) 571int ds_get_pebs_end(struct task_struct *task, size_t *pos)
190 return -EINVAL; 572{
573 return ds_get_end(task, pos, ds_pebs);
574}
191 575
192 bts_size_in_records = 576static int ds_access(struct task_struct *task, size_t index,
193 bts_size_in_bytes / ds_cfg.sizeof_bts; 577 const void **record, enum ds_qualifier qual)
194 bts_size_in_bytes = 578{
195 bts_size_in_records * ds_cfg.sizeof_bts; 579 struct ds_context *context;
580 unsigned long base, idx;
581 int error;
196 582
197 if (bts_size_in_bytes <= 0) 583 if (!record)
198 return -EINVAL; 584 return -EINVAL;
199 585
200 bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); 586 context = ds_get_context(task);
201 587 error = ds_validate_access(context, qual);
202 if (!bts) 588 if (error < 0)
203 return -ENOMEM; 589 goto out;
204 590
205 ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); 591 base = ds_get(context->ds, qual, ds_buffer_base);
592 idx = base + (index * ds_cfg.sizeof_rec[qual]);
206 593
207 if (!ds) { 594 error = -EINVAL;
208 kfree((void *)bts); 595 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
209 return -ENOMEM; 596 goto out;
210 }
211
212 set_bts_buffer_base(ds, bts);
213 set_bts_index(ds, bts);
214 set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
215 set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
216 597
217 *dsp = ds; 598 *record = (const void *)idx;
218 return 0; 599 error = ds_cfg.sizeof_rec[qual];
600 out:
601 ds_put_context(context);
602 return error;
219} 603}
220 604
221int ds_free(void **dsp) 605int ds_access_bts(struct task_struct *task, size_t index, const void **record)
222{ 606{
223 if (*dsp) { 607 return ds_access(task, index, record, ds_bts);
224 kfree((void *)get_bts_buffer_base(*dsp));
225 kfree(*dsp);
226 *dsp = NULL;
227 }
228 return 0;
229} 608}
230 609
231int ds_get_bts_size(void *ds) 610int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
232{ 611{
233 int size_in_bytes; 612 return ds_access(task, index, record, ds_pebs);
234
235 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
236 return -EOPNOTSUPP;
237
238 if (!ds)
239 return 0;
240
241 size_in_bytes =
242 get_bts_absolute_maximum(ds) -
243 get_bts_buffer_base(ds);
244 return size_in_bytes;
245} 613}
246 614
247int ds_get_bts_end(void *ds) 615static int ds_write(struct task_struct *task, const void *record, size_t size,
616 enum ds_qualifier qual, int force)
248{ 617{
249 int size_in_bytes = ds_get_bts_size(ds); 618 struct ds_context *context;
250 619 int error;
251 if (size_in_bytes <= 0)
252 return size_in_bytes;
253 620
254 return size_in_bytes / ds_cfg.sizeof_bts; 621 if (!record)
255} 622 return -EINVAL;
256 623
257int ds_get_bts_index(void *ds) 624 error = -EPERM;
258{ 625 context = ds_get_context(task);
259 int index_offset_in_bytes; 626 if (!context)
627 goto out;
260 628
261 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 629 if (!force) {
262 return -EOPNOTSUPP; 630 error = ds_validate_access(context, qual);
631 if (error < 0)
632 goto out;
633 }
263 634
264 index_offset_in_bytes = 635 error = 0;
265 get_bts_index(ds) - 636 while (size) {
266 get_bts_buffer_base(ds); 637 unsigned long base, index, end, write_end, int_th;
638 unsigned long write_size, adj_write_size;
639
640 /*
641 * write as much as possible without producing an
642 * overflow interrupt.
643 *
644 * interrupt_threshold must either be
645 * - bigger than absolute_maximum or
646 * - point to a record between buffer_base and absolute_maximum
647 *
648 * index points to a valid record.
649 */
650 base = ds_get(context->ds, qual, ds_buffer_base);
651 index = ds_get(context->ds, qual, ds_index);
652 end = ds_get(context->ds, qual, ds_absolute_maximum);
653 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
654
655 write_end = min(end, int_th);
656
657 /* if we are already beyond the interrupt threshold,
658 * we fill the entire buffer */
659 if (write_end <= index)
660 write_end = end;
661
662 if (write_end <= index)
663 goto out;
664
665 write_size = min((unsigned long) size, write_end - index);
666 memcpy((void *)index, record, write_size);
667
668 record = (const char *)record + write_size;
669 size -= write_size;
670 error += write_size;
671
672 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
673 adj_write_size *= ds_cfg.sizeof_rec[qual];
674
675 /* zero out trailing bytes */
676 memset((char *)index + write_size, 0,
677 adj_write_size - write_size);
678 index += adj_write_size;
679
680 if (index >= end)
681 index = base;
682 ds_set(context->ds, qual, ds_index, index);
683
684 if (index >= int_th)
685 ds_overflow(task, context, qual);
686 }
267 687
268 return index_offset_in_bytes / ds_cfg.sizeof_bts; 688 out:
689 ds_put_context(context);
690 return error;
269} 691}
270 692
271int ds_set_overflow(void *ds, int method) 693int ds_write_bts(struct task_struct *task, const void *record, size_t size)
272{ 694{
273 switch (method) { 695 return ds_write(task, record, size, ds_bts, /* force = */ 0);
274 case DS_O_SIGNAL:
275 return -EOPNOTSUPP;
276 case DS_O_WRAP:
277 return 0;
278 default:
279 return -EINVAL;
280 }
281} 696}
282 697
283int ds_get_overflow(void *ds) 698int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
284{ 699{
285 return DS_O_WRAP; 700 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
286} 701}
287 702
288int ds_clear(void *ds) 703int ds_unchecked_write_bts(struct task_struct *task,
704 const void *record, size_t size)
289{ 705{
290 int bts_size = ds_get_bts_size(ds); 706 return ds_write(task, record, size, ds_bts, /* force = */ 1);
291 unsigned long bts_base;
292
293 if (bts_size <= 0)
294 return bts_size;
295
296 bts_base = get_bts_buffer_base(ds);
297 memset((void *)bts_base, 0, bts_size);
298
299 set_bts_index(ds, bts_base);
300 return 0;
301} 707}
302 708
303int ds_read_bts(void *ds, int index, struct bts_struct *out) 709int ds_unchecked_write_pebs(struct task_struct *task,
710 const void *record, size_t size)
304{ 711{
305 void *bts; 712 return ds_write(task, record, size, ds_pebs, /* force = */ 1);
713}
306 714
307 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 715static int ds_reset_or_clear(struct task_struct *task,
308 return -EOPNOTSUPP; 716 enum ds_qualifier qual, int clear)
717{
718 struct ds_context *context;
719 unsigned long base, end;
720 int error;
309 721
310 if (index < 0) 722 context = ds_get_context(task);
311 return -EINVAL; 723 error = ds_validate_access(context, qual);
724 if (error < 0)
725 goto out;
312 726
313 if (index >= ds_get_bts_size(ds)) 727 base = ds_get(context->ds, qual, ds_buffer_base);
314 return -EINVAL; 728 end = ds_get(context->ds, qual, ds_absolute_maximum);
315 729
316 bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); 730 if (clear)
731 memset((void *)base, 0, end - base);
317 732
318 memset(out, 0, sizeof(*out)); 733 ds_set(context->ds, qual, ds_index, base);
319 if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
320 out->qualifier = get_info_type(bts);
321 out->variant.jiffies = get_info_data(bts);
322 } else {
323 out->qualifier = BTS_BRANCH;
324 out->variant.lbr.from_ip = get_from_ip(bts);
325 out->variant.lbr.to_ip = get_to_ip(bts);
326 }
327 734
328 return sizeof(*out);; 735 error = 0;
736 out:
737 ds_put_context(context);
738 return error;
329} 739}
330 740
331int ds_write_bts(void *ds, const struct bts_struct *in) 741int ds_reset_bts(struct task_struct *task)
332{ 742{
333 unsigned long bts; 743 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
334 744}
335 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
336 return -EOPNOTSUPP;
337
338 if (ds_get_bts_size(ds) <= 0)
339 return -ENXIO;
340 745
341 bts = get_bts_index(ds); 746int ds_reset_pebs(struct task_struct *task)
747{
748 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
749}
342 750
343 memset((void *)bts, 0, ds_cfg.sizeof_bts); 751int ds_clear_bts(struct task_struct *task)
344 switch (in->qualifier) { 752{
345 case BTS_INVALID: 753 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
346 break; 754}
347 755
348 case BTS_BRANCH: 756int ds_clear_pebs(struct task_struct *task)
349 set_from_ip((void *)bts, in->variant.lbr.from_ip); 757{
350 set_to_ip((void *)bts, in->variant.lbr.to_ip); 758 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
351 break; 759}
352 760
353 case BTS_TASK_ARRIVES: 761int ds_get_pebs_reset(struct task_struct *task, u64 *value)
354 case BTS_TASK_DEPARTS: 762{
355 set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); 763 struct ds_context *context;
356 set_info_type((void *)bts, in->qualifier); 764 int error;
357 set_info_data((void *)bts, in->variant.jiffies);
358 break;
359 765
360 default: 766 if (!value)
361 return -EINVAL; 767 return -EINVAL;
362 }
363 768
364 bts = bts + ds_cfg.sizeof_bts; 769 context = ds_get_context(task);
365 if (bts >= get_bts_absolute_maximum(ds)) 770 error = ds_validate_access(context, ds_pebs);
366 bts = get_bts_buffer_base(ds); 771 if (error < 0)
367 set_bts_index(ds, bts); 772 goto out;
368 773
369 return ds_cfg.sizeof_bts; 774 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
775
776 error = 0;
777 out:
778 ds_put_context(context);
779 return error;
370} 780}
371 781
372unsigned long ds_debugctl_mask(void) 782int ds_set_pebs_reset(struct task_struct *task, u64 value)
373{ 783{
374 return ds_cfg.debugctl_mask; 784 struct ds_context *context;
375} 785 int error;
376 786
377#ifdef __i386__ 787 context = ds_get_context(task);
378static const struct ds_configuration ds_cfg_netburst = { 788 error = ds_validate_access(context, ds_pebs);
379 .sizeof_ds = 9 * 4, 789 if (error < 0)
380 .bts_buffer_base = { 0, 4 }, 790 goto out;
381 .bts_index = { 4, 4 },
382 .bts_absolute_maximum = { 8, 4 },
383 .bts_interrupt_threshold = { 12, 4 },
384 .sizeof_bts = 3 * 4,
385 .from_ip = { 0, 4 },
386 .to_ip = { 4, 4 },
387 .info_type = { 4, 1 },
388 .info_data = { 8, 4 },
389 .debugctl_mask = (1<<2)|(1<<3)
390};
391 791
392static const struct ds_configuration ds_cfg_pentium_m = { 792 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
393 .sizeof_ds = 9 * 4, 793
394 .bts_buffer_base = { 0, 4 }, 794 error = 0;
395 .bts_index = { 4, 4 }, 795 out:
396 .bts_absolute_maximum = { 8, 4 }, 796 ds_put_context(context);
397 .bts_interrupt_threshold = { 12, 4 }, 797 return error;
398 .sizeof_bts = 3 * 4, 798}
399 .from_ip = { 0, 4 }, 799
400 .to_ip = { 4, 4 }, 800static const struct ds_configuration ds_cfg_var = {
401 .info_type = { 4, 1 }, 801 .sizeof_ds = sizeof(long) * 12,
402 .info_data = { 8, 4 }, 802 .sizeof_field = sizeof(long),
403 .debugctl_mask = (1<<6)|(1<<7) 803 .sizeof_rec[ds_bts] = sizeof(long) * 3,
804 .sizeof_rec[ds_pebs] = sizeof(long) * 10
404}; 805};
405#endif /* _i386_ */ 806static const struct ds_configuration ds_cfg_64 = {
406 807 .sizeof_ds = 8 * 12,
407static const struct ds_configuration ds_cfg_core2 = { 808 .sizeof_field = 8,
408 .sizeof_ds = 9 * 8, 809 .sizeof_rec[ds_bts] = 8 * 3,
409 .bts_buffer_base = { 0, 8 }, 810 .sizeof_rec[ds_pebs] = 8 * 10
410 .bts_index = { 8, 8 },
411 .bts_absolute_maximum = { 16, 8 },
412 .bts_interrupt_threshold = { 24, 8 },
413 .sizeof_bts = 3 * 8,
414 .from_ip = { 0, 8 },
415 .to_ip = { 8, 8 },
416 .info_type = { 8, 1 },
417 .info_data = { 16, 8 },
418 .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
419}; 811};
420 812
421static inline void 813static inline void
@@ -429,14 +821,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
429 switch (c->x86) { 821 switch (c->x86) {
430 case 0x6: 822 case 0x6:
431 switch (c->x86_model) { 823 switch (c->x86_model) {
432#ifdef __i386__
433 case 0xD: 824 case 0xD:
434 case 0xE: /* Pentium M */ 825 case 0xE: /* Pentium M */
435 ds_configure(&ds_cfg_pentium_m); 826 ds_configure(&ds_cfg_var);
436 break; 827 break;
437#endif /* _i386_ */
438 case 0xF: /* Core2 */ 828 case 0xF: /* Core2 */
439 ds_configure(&ds_cfg_core2); 829 case 0x1C: /* Atom */
830 ds_configure(&ds_cfg_64);
440 break; 831 break;
441 default: 832 default:
442 /* sorry, don't know about them */ 833 /* sorry, don't know about them */
@@ -445,13 +836,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
445 break; 836 break;
446 case 0xF: 837 case 0xF:
447 switch (c->x86_model) { 838 switch (c->x86_model) {
448#ifdef __i386__
449 case 0x0: 839 case 0x0:
450 case 0x1: 840 case 0x1:
451 case 0x2: /* Netburst */ 841 case 0x2: /* Netburst */
452 ds_configure(&ds_cfg_netburst); 842 ds_configure(&ds_cfg_var);
453 break; 843 break;
454#endif /* _i386_ */
455 default: 844 default:
456 /* sorry, don't know about them */ 845 /* sorry, don't know about them */
457 break; 846 break;
@@ -462,3 +851,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
462 break; 851 break;
463 } 852 }
464} 853}
854
855void ds_free(struct ds_context *context)
856{
857 /* This is called when the task owning the parameter context
858 * is dying. There should not be any user of that context left
859 * to disturb us, anymore. */
860 unsigned long leftovers = context->count;
861 while (leftovers--)
862 ds_put_context(context);
863}
864#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 06cc8d4254b1..945a31cdd81f 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -414,9 +414,11 @@ void __init efi_init(void)
414 if (memmap.map == NULL) 414 if (memmap.map == NULL)
415 printk(KERN_ERR "Could not map the EFI memory map!\n"); 415 printk(KERN_ERR "Could not map the EFI memory map!\n");
416 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); 416 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
417
417 if (memmap.desc_size != sizeof(efi_memory_desc_t)) 418 if (memmap.desc_size != sizeof(efi_memory_desc_t))
418 printk(KERN_WARNING "Kernel-defined memdesc" 419 printk(KERN_WARNING
419 "doesn't match the one from EFI!\n"); 420 "Kernel-defined memdesc doesn't match the one from EFI!\n");
421
420 if (add_efi_memmap) 422 if (add_efi_memmap)
421 do_add_efi_memmap(); 423 do_add_efi_memmap();
422 424
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 89434d439605..cf3a0b2d0059 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -275,9 +275,9 @@ ENTRY(native_usergs_sysret64)
275ENTRY(ret_from_fork) 275ENTRY(ret_from_fork)
276 CFI_DEFAULT_STACK 276 CFI_DEFAULT_STACK
277 push kernel_eflags(%rip) 277 push kernel_eflags(%rip)
278 CFI_ADJUST_CFA_OFFSET 4 278 CFI_ADJUST_CFA_OFFSET 8
279 popf # reset kernel eflags 279 popf # reset kernel eflags
280 CFI_ADJUST_CFA_OFFSET -4 280 CFI_ADJUST_CFA_OFFSET -8
281 call schedule_tail 281 call schedule_tail
282 GET_THREAD_INFO(%rcx) 282 GET_THREAD_INFO(%rcx)
283 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 283 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 9bfc4d72fb2e..d16084f90649 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -108,12 +108,11 @@ void __init x86_64_start_kernel(char * real_mode_data)
108 } 108 }
109 load_idt((const struct desc_ptr *)&idt_descr); 109 load_idt((const struct desc_ptr *)&idt_descr);
110 110
111 early_printk("Kernel alive\n"); 111 if (console_loglevel == 10)
112 early_printk("Kernel alive\n");
112 113
113 x86_64_init_pda(); 114 x86_64_init_pda();
114 115
115 early_printk("Kernel really alive\n");
116
117 x86_64_start_reservations(real_mode_data); 116 x86_64_start_reservations(real_mode_data);
118} 117}
119 118
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index a7010c3a377a..e835b4eea70b 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -172,10 +172,6 @@ num_subarch_entries = (. - subarch_entries) / 4
172 * 172 *
173 * Note that the stack is not yet set up! 173 * Note that the stack is not yet set up!
174 */ 174 */
175#define PTE_ATTR 0x007 /* PRESENT+RW+USER */
176#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
177#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */
178
179default_entry: 175default_entry:
180#ifdef CONFIG_X86_PAE 176#ifdef CONFIG_X86_PAE
181 177
@@ -196,9 +192,9 @@ default_entry:
196 movl $pa(pg0), %edi 192 movl $pa(pg0), %edi
197 movl %edi, pa(init_pg_tables_start) 193 movl %edi, pa(init_pg_tables_start)
198 movl $pa(swapper_pg_pmd), %edx 194 movl $pa(swapper_pg_pmd), %edx
199 movl $PTE_ATTR, %eax 195 movl $PTE_IDENT_ATTR, %eax
20010: 19610:
201 leal PDE_ATTR(%edi),%ecx /* Create PMD entry */ 197 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
202 movl %ecx,(%edx) /* Store PMD entry */ 198 movl %ecx,(%edx) /* Store PMD entry */
203 /* Upper half already zero */ 199 /* Upper half already zero */
204 addl $8,%edx 200 addl $8,%edx
@@ -215,7 +211,7 @@ default_entry:
215 * End condition: we must map up to and including INIT_MAP_BEYOND_END 211 * End condition: we must map up to and including INIT_MAP_BEYOND_END
216 * bytes beyond the end of our own page tables. 212 * bytes beyond the end of our own page tables.
217 */ 213 */
218 leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp 214 leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
219 cmpl %ebp,%eax 215 cmpl %ebp,%eax
220 jb 10b 216 jb 10b
2211: 2171:
@@ -224,7 +220,7 @@ default_entry:
224 movl %eax, pa(max_pfn_mapped) 220 movl %eax, pa(max_pfn_mapped)
225 221
226 /* Do early initialization of the fixmap area */ 222 /* Do early initialization of the fixmap area */
227 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax 223 movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax
228 movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) 224 movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8)
229#else /* Not PAE */ 225#else /* Not PAE */
230 226
@@ -233,9 +229,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
233 movl $pa(pg0), %edi 229 movl $pa(pg0), %edi
234 movl %edi, pa(init_pg_tables_start) 230 movl %edi, pa(init_pg_tables_start)
235 movl $pa(swapper_pg_dir), %edx 231 movl $pa(swapper_pg_dir), %edx
236 movl $PTE_ATTR, %eax 232 movl $PTE_IDENT_ATTR, %eax
23710: 23310:
238 leal PDE_ATTR(%edi),%ecx /* Create PDE entry */ 234 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
239 movl %ecx,(%edx) /* Store identity PDE entry */ 235 movl %ecx,(%edx) /* Store identity PDE entry */
240 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ 236 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
241 addl $4,%edx 237 addl $4,%edx
@@ -249,7 +245,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
249 * bytes beyond the end of our own page tables; the +0x007 is 245 * bytes beyond the end of our own page tables; the +0x007 is
250 * the attribute bits 246 * the attribute bits
251 */ 247 */
252 leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp 248 leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
253 cmpl %ebp,%eax 249 cmpl %ebp,%eax
254 jb 10b 250 jb 10b
255 movl %edi,pa(init_pg_tables_end) 251 movl %edi,pa(init_pg_tables_end)
@@ -257,7 +253,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
257 movl %eax, pa(max_pfn_mapped) 253 movl %eax, pa(max_pfn_mapped)
258 254
259 /* Do early initialization of the fixmap area */ 255 /* Do early initialization of the fixmap area */
260 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax 256 movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax
261 movl %eax,pa(swapper_pg_dir+0xffc) 257 movl %eax,pa(swapper_pg_dir+0xffc)
262#endif 258#endif
263 jmp 3f 259 jmp 3f
@@ -634,19 +630,19 @@ ENTRY(empty_zero_page)
634 /* Page-aligned for the benefit of paravirt? */ 630 /* Page-aligned for the benefit of paravirt? */
635 .align PAGE_SIZE_asm 631 .align PAGE_SIZE_asm
636ENTRY(swapper_pg_dir) 632ENTRY(swapper_pg_dir)
637 .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */ 633 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
638# if KPMDS == 3 634# if KPMDS == 3
639 .long pa(swapper_pg_pmd+PGD_ATTR),0 635 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0
640 .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 636 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0
641 .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0 637 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0
642# elif KPMDS == 2 638# elif KPMDS == 2
643 .long 0,0 639 .long 0,0
644 .long pa(swapper_pg_pmd+PGD_ATTR),0 640 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0
645 .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 641 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0
646# elif KPMDS == 1 642# elif KPMDS == 1
647 .long 0,0 643 .long 0,0
648 .long 0,0 644 .long 0,0
649 .long pa(swapper_pg_pmd+PGD_ATTR),0 645 .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0
650# else 646# else
651# error "Kernel PMDs should be 1, 2 or 3" 647# error "Kernel PMDs should be 1, 2 or 3"
652# endif 648# endif
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index db3280afe886..26cfdc1d7c7f 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -110,7 +110,7 @@ startup_64:
110 movq %rdi, %rax 110 movq %rdi, %rax
111 shrq $PMD_SHIFT, %rax 111 shrq $PMD_SHIFT, %rax
112 andq $(PTRS_PER_PMD - 1), %rax 112 andq $(PTRS_PER_PMD - 1), %rax
113 leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx 113 leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx
114 leaq level2_spare_pgt(%rip), %rbx 114 leaq level2_spare_pgt(%rip), %rbx
115 movq %rdx, 0(%rbx, %rax, 8) 115 movq %rdx, 0(%rbx, %rax, 8)
116ident_complete: 116ident_complete:
@@ -374,7 +374,7 @@ NEXT_PAGE(level2_ident_pgt)
374 /* Since I easily can, map the first 1G. 374 /* Since I easily can, map the first 1G.
375 * Don't set NX because code runs from these pages. 375 * Don't set NX because code runs from these pages.
376 */ 376 */
377 PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) 377 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
378 378
379NEXT_PAGE(level2_kernel_pgt) 379NEXT_PAGE(level2_kernel_pgt)
380 /* 380 /*
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 50e5e4a31c85..191914302744 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -14,6 +14,7 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/thread_info.h> 15#include <linux/thread_info.h>
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
17#include <asm/syscalls.h>
17 18
18/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ 19/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
19static void set_bitmap(unsigned long *bitmap, unsigned int base, 20static void set_bitmap(unsigned long *bitmap, unsigned int base,
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index 3f7537b669d3..f1c688e46f35 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -20,6 +20,8 @@
20 20
21#ifdef CONFIG_X86_32 21#ifdef CONFIG_X86_32
22#include <mach_apic.h> 22#include <mach_apic.h>
23#include <mach_ipi.h>
24
23/* 25/*
24 * the following functions deal with sending IPIs between CPUs. 26 * the following functions deal with sending IPIs between CPUs.
25 * 27 *
@@ -147,7 +149,6 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
147} 149}
148 150
149/* must come after the send_IPI functions above for inlining */ 151/* must come after the send_IPI functions above for inlining */
150#include <mach_ipi.h>
151static int convert_apicid_to_cpu(int apic_id) 152static int convert_apicid_to_cpu(int apic_id)
152{ 153{
153 int i; 154 int i;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 1cf8c1fcc088..b71e02d42f4f 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -325,7 +325,7 @@ skip:
325 for_each_online_cpu(j) 325 for_each_online_cpu(j)
326 seq_printf(p, "%10u ", 326 seq_printf(p, "%10u ",
327 per_cpu(irq_stat,j).irq_call_count); 327 per_cpu(irq_stat,j).irq_call_count);
328 seq_printf(p, " function call interrupts\n"); 328 seq_printf(p, " Function call interrupts\n");
329 seq_printf(p, "TLB: "); 329 seq_printf(p, "TLB: ");
330 for_each_online_cpu(j) 330 for_each_online_cpu(j)
331 seq_printf(p, "%10u ", 331 seq_printf(p, "%10u ",
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1f78b238d8d2..f065fe9071b9 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -129,7 +129,7 @@ skip:
129 seq_printf(p, "CAL: "); 129 seq_printf(p, "CAL: ");
130 for_each_online_cpu(j) 130 for_each_online_cpu(j)
131 seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); 131 seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
132 seq_printf(p, " function call interrupts\n"); 132 seq_printf(p, " Function call interrupts\n");
133 seq_printf(p, "TLB: "); 133 seq_printf(p, "TLB: ");
134 for_each_online_cpu(j) 134 for_each_online_cpu(j)
135 seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); 135 seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index f2d43bc75514..ff7d3b0124f1 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -139,6 +139,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
139 if (PageHighMem(pg)) { 139 if (PageHighMem(pg)) {
140 data = ioremap_cache(pa_data, sizeof(*data)); 140 data = ioremap_cache(pa_data, sizeof(*data));
141 if (!data) { 141 if (!data) {
142 kfree(node);
142 error = -ENXIO; 143 error = -ENXIO;
143 goto err_dir; 144 goto err_dir;
144 } 145 }
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index f47f0eb886b8..10435a120d22 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -69,6 +69,9 @@ static int gdb_x86vector = -1;
69 */ 69 */
70void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) 70void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
71{ 71{
72#ifndef CONFIG_X86_32
73 u32 *gdb_regs32 = (u32 *)gdb_regs;
74#endif
72 gdb_regs[GDB_AX] = regs->ax; 75 gdb_regs[GDB_AX] = regs->ax;
73 gdb_regs[GDB_BX] = regs->bx; 76 gdb_regs[GDB_BX] = regs->bx;
74 gdb_regs[GDB_CX] = regs->cx; 77 gdb_regs[GDB_CX] = regs->cx;
@@ -76,9 +79,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
76 gdb_regs[GDB_SI] = regs->si; 79 gdb_regs[GDB_SI] = regs->si;
77 gdb_regs[GDB_DI] = regs->di; 80 gdb_regs[GDB_DI] = regs->di;
78 gdb_regs[GDB_BP] = regs->bp; 81 gdb_regs[GDB_BP] = regs->bp;
79 gdb_regs[GDB_PS] = regs->flags;
80 gdb_regs[GDB_PC] = regs->ip; 82 gdb_regs[GDB_PC] = regs->ip;
81#ifdef CONFIG_X86_32 83#ifdef CONFIG_X86_32
84 gdb_regs[GDB_PS] = regs->flags;
82 gdb_regs[GDB_DS] = regs->ds; 85 gdb_regs[GDB_DS] = regs->ds;
83 gdb_regs[GDB_ES] = regs->es; 86 gdb_regs[GDB_ES] = regs->es;
84 gdb_regs[GDB_CS] = regs->cs; 87 gdb_regs[GDB_CS] = regs->cs;
@@ -94,6 +97,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
94 gdb_regs[GDB_R13] = regs->r13; 97 gdb_regs[GDB_R13] = regs->r13;
95 gdb_regs[GDB_R14] = regs->r14; 98 gdb_regs[GDB_R14] = regs->r14;
96 gdb_regs[GDB_R15] = regs->r15; 99 gdb_regs[GDB_R15] = regs->r15;
100 gdb_regs32[GDB_PS] = regs->flags;
101 gdb_regs32[GDB_CS] = regs->cs;
102 gdb_regs32[GDB_SS] = regs->ss;
97#endif 103#endif
98 gdb_regs[GDB_SP] = regs->sp; 104 gdb_regs[GDB_SP] = regs->sp;
99} 105}
@@ -112,6 +118,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
112 */ 118 */
113void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) 119void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
114{ 120{
121#ifndef CONFIG_X86_32
122 u32 *gdb_regs32 = (u32 *)gdb_regs;
123#endif
115 gdb_regs[GDB_AX] = 0; 124 gdb_regs[GDB_AX] = 0;
116 gdb_regs[GDB_BX] = 0; 125 gdb_regs[GDB_BX] = 0;
117 gdb_regs[GDB_CX] = 0; 126 gdb_regs[GDB_CX] = 0;
@@ -129,8 +138,10 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
129 gdb_regs[GDB_FS] = 0xFFFF; 138 gdb_regs[GDB_FS] = 0xFFFF;
130 gdb_regs[GDB_GS] = 0xFFFF; 139 gdb_regs[GDB_GS] = 0xFFFF;
131#else 140#else
132 gdb_regs[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); 141 gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8);
133 gdb_regs[GDB_PC] = 0; 142 gdb_regs32[GDB_CS] = __KERNEL_CS;
143 gdb_regs32[GDB_SS] = __KERNEL_DS;
144 gdb_regs[GDB_PC] = p->thread.ip;
134 gdb_regs[GDB_R8] = 0; 145 gdb_regs[GDB_R8] = 0;
135 gdb_regs[GDB_R9] = 0; 146 gdb_regs[GDB_R9] = 0;
136 gdb_regs[GDB_R10] = 0; 147 gdb_regs[GDB_R10] = 0;
@@ -153,6 +164,9 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
153 */ 164 */
154void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) 165void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
155{ 166{
167#ifndef CONFIG_X86_32
168 u32 *gdb_regs32 = (u32 *)gdb_regs;
169#endif
156 regs->ax = gdb_regs[GDB_AX]; 170 regs->ax = gdb_regs[GDB_AX];
157 regs->bx = gdb_regs[GDB_BX]; 171 regs->bx = gdb_regs[GDB_BX];
158 regs->cx = gdb_regs[GDB_CX]; 172 regs->cx = gdb_regs[GDB_CX];
@@ -160,9 +174,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
160 regs->si = gdb_regs[GDB_SI]; 174 regs->si = gdb_regs[GDB_SI];
161 regs->di = gdb_regs[GDB_DI]; 175 regs->di = gdb_regs[GDB_DI];
162 regs->bp = gdb_regs[GDB_BP]; 176 regs->bp = gdb_regs[GDB_BP];
163 regs->flags = gdb_regs[GDB_PS];
164 regs->ip = gdb_regs[GDB_PC]; 177 regs->ip = gdb_regs[GDB_PC];
165#ifdef CONFIG_X86_32 178#ifdef CONFIG_X86_32
179 regs->flags = gdb_regs[GDB_PS];
166 regs->ds = gdb_regs[GDB_DS]; 180 regs->ds = gdb_regs[GDB_DS];
167 regs->es = gdb_regs[GDB_ES]; 181 regs->es = gdb_regs[GDB_ES];
168 regs->cs = gdb_regs[GDB_CS]; 182 regs->cs = gdb_regs[GDB_CS];
@@ -175,6 +189,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
175 regs->r13 = gdb_regs[GDB_R13]; 189 regs->r13 = gdb_regs[GDB_R13];
176 regs->r14 = gdb_regs[GDB_R14]; 190 regs->r14 = gdb_regs[GDB_R14];
177 regs->r15 = gdb_regs[GDB_R15]; 191 regs->r15 = gdb_regs[GDB_R15];
192 regs->flags = gdb_regs32[GDB_PS];
193 regs->cs = gdb_regs32[GDB_CS];
194 regs->ss = gdb_regs32[GDB_SS];
178#endif 195#endif
179} 196}
180 197
@@ -378,10 +395,8 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
378 if (remcomInBuffer[0] == 's') { 395 if (remcomInBuffer[0] == 's') {
379 linux_regs->flags |= X86_EFLAGS_TF; 396 linux_regs->flags |= X86_EFLAGS_TF;
380 kgdb_single_step = 1; 397 kgdb_single_step = 1;
381 if (kgdb_contthread) { 398 atomic_set(&kgdb_cpu_doing_single_step,
382 atomic_set(&kgdb_cpu_doing_single_step, 399 raw_smp_processor_id());
383 raw_smp_processor_id());
384 }
385 } 400 }
386 401
387 get_debugreg(dr6, 6); 402 get_debugreg(dr6, 6);
@@ -440,12 +455,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
440 return NOTIFY_DONE; 455 return NOTIFY_DONE;
441 456
442 case DIE_NMI_IPI: 457 case DIE_NMI_IPI:
443 if (atomic_read(&kgdb_active) != -1) { 458 /* Just ignore, we will handle the roundup on DIE_NMI. */
444 /* KGDB CPU roundup */
445 kgdb_nmicallback(raw_smp_processor_id(), regs);
446 was_in_debug_nmi[raw_smp_processor_id()] = 1;
447 touch_nmi_watchdog();
448 }
449 return NOTIFY_DONE; 459 return NOTIFY_DONE;
450 460
451 case DIE_NMIUNKNOWN: 461 case DIE_NMIUNKNOWN:
@@ -466,9 +476,15 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
466 476
467 case DIE_DEBUG: 477 case DIE_DEBUG:
468 if (atomic_read(&kgdb_cpu_doing_single_step) == 478 if (atomic_read(&kgdb_cpu_doing_single_step) ==
469 raw_smp_processor_id() && 479 raw_smp_processor_id()) {
470 user_mode(regs)) 480 if (user_mode(regs))
471 return single_step_cont(regs, args); 481 return single_step_cont(regs, args);
482 break;
483 } else if (test_thread_flag(TIF_SINGLESTEP))
484 /* This means a user thread is single stepping
485 * a system call which should be ignored
486 */
487 return NOTIFY_DONE;
472 /* fall through */ 488 /* fall through */
473 default: 489 default:
474 if (user_mode(regs)) 490 if (user_mode(regs))
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8b7a3cf37d2b..478bca986eca 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -178,7 +178,7 @@ static void kvm_flush_tlb(void)
178 kvm_deferred_mmu_op(&ftlb, sizeof ftlb); 178 kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
179} 179}
180 180
181static void kvm_release_pt(u32 pfn) 181static void kvm_release_pt(unsigned long pfn)
182{ 182{
183 struct kvm_mmu_op_release_pt rpt = { 183 struct kvm_mmu_op_release_pt rpt = {
184 .header.op = KVM_MMU_OP_RELEASE_PT, 184 .header.op = KVM_MMU_OP_RELEASE_PT,
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b68e21f06f4f..0ed5f939b905 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -18,6 +18,7 @@
18#include <asm/ldt.h> 18#include <asm/ldt.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
20#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
21#include <asm/syscalls.h>
21 22
22#ifdef CONFIG_SMP 23#ifdef CONFIG_SMP
23static void flush_ldt(void *current_mm) 24static void flush_ldt(void *current_mm)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index abb78a2cc4ad..2c97f07f1c2c 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -299,6 +299,15 @@ void acpi_nmi_disable(void)
299 on_each_cpu(__acpi_nmi_disable, NULL, 1); 299 on_each_cpu(__acpi_nmi_disable, NULL, 1);
300} 300}
301 301
302/*
303 * This function is called as soon the LAPIC NMI watchdog driver has everything
304 * in place and it's ready to check if the NMIs belong to the NMI watchdog
305 */
306void cpu_nmi_set_wd_enabled(void)
307{
308 __get_cpu_var(wd_enabled) = 1;
309}
310
302void setup_apic_nmi_watchdog(void *unused) 311void setup_apic_nmi_watchdog(void *unused)
303{ 312{
304 if (__get_cpu_var(wd_enabled)) 313 if (__get_cpu_var(wd_enabled))
@@ -311,8 +320,6 @@ void setup_apic_nmi_watchdog(void *unused)
311 320
312 switch (nmi_watchdog) { 321 switch (nmi_watchdog) {
313 case NMI_LOCAL_APIC: 322 case NMI_LOCAL_APIC:
314 /* enable it before to avoid race with handler */
315 __get_cpu_var(wd_enabled) = 1;
316 if (lapic_watchdog_init(nmi_hz) < 0) { 323 if (lapic_watchdog_init(nmi_hz) < 0) {
317 __get_cpu_var(wd_enabled) = 0; 324 __get_cpu_var(wd_enabled) = 0;
318 return; 325 return;
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 3e6672274807..7a13fac63a1f 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -190,12 +190,12 @@ EXPORT_SYMBOL_GPL(olpc_ec_cmd);
190static void __init platform_detect(void) 190static void __init platform_detect(void)
191{ 191{
192 size_t propsize; 192 size_t propsize;
193 u32 rev; 193 __be32 rev;
194 194
195 if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, 195 if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4,
196 &propsize) || propsize != 4) { 196 &propsize) || propsize != 4) {
197 printk(KERN_ERR "ofw: getprop call failed!\n"); 197 printk(KERN_ERR "ofw: getprop call failed!\n");
198 rev = 0; 198 rev = cpu_to_be32(0);
199 } 199 }
200 olpc_platform_info.boardrev = be32_to_cpu(rev); 200 olpc_platform_info.boardrev = be32_to_cpu(rev);
201} 201}
@@ -203,7 +203,7 @@ static void __init platform_detect(void)
203static void __init platform_detect(void) 203static void __init platform_detect(void)
204{ 204{
205 /* stopgap until OFW support is added to the kernel */ 205 /* stopgap until OFW support is added to the kernel */
206 olpc_platform_info.boardrev = be32_to_cpu(0xc2); 206 olpc_platform_info.boardrev = 0xc2;
207} 207}
208#endif 208#endif
209 209
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 300da17e61cb..e2f43768723a 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -330,6 +330,7 @@ struct pv_cpu_ops pv_cpu_ops = {
330#endif 330#endif
331 .wbinvd = native_wbinvd, 331 .wbinvd = native_wbinvd,
332 .read_msr = native_read_msr_safe, 332 .read_msr = native_read_msr_safe,
333 .read_msr_amd = native_read_msr_amd_safe,
333 .write_msr = native_write_msr_safe, 334 .write_msr = native_write_msr_safe,
334 .read_tsc = native_read_tsc, 335 .read_tsc = native_read_tsc,
335 .read_pmc = native_read_pmc, 336 .read_pmc = native_read_pmc,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 58262218781b..9fe644f4861d 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -23,7 +23,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
23 start = start_##ops##_##x; \ 23 start = start_##ops##_##x; \
24 end = end_##ops##_##x; \ 24 end = end_##ops##_##x; \
25 goto patch_site 25 goto patch_site
26 switch(type) { 26 switch (type) {
27 PATCH_SITE(pv_irq_ops, irq_disable); 27 PATCH_SITE(pv_irq_ops, irq_disable);
28 PATCH_SITE(pv_irq_ops, irq_enable); 28 PATCH_SITE(pv_irq_ops, irq_enable);
29 PATCH_SITE(pv_irq_ops, restore_fl); 29 PATCH_SITE(pv_irq_ops, restore_fl);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 4e612d20170a..0a3824e837b4 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -83,7 +83,7 @@ void __init dma32_reserve_bootmem(void)
83 * using 512M as goal 83 * using 512M as goal
84 */ 84 */
85 align = 64ULL<<20; 85 align = 64ULL<<20;
86 size = round_up(dma32_bootmem_size, align); 86 size = roundup(dma32_bootmem_size, align);
87 dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, 87 dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
88 512ULL<<20); 88 512ULL<<20);
89 if (dma32_bootmem_ptr) 89 if (dma32_bootmem_ptr)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index d077116fec1b..145f1c83369f 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -650,7 +650,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
650 struct pci_dev *dev; 650 struct pci_dev *dev;
651 void *gatt; 651 void *gatt;
652 int i, error; 652 int i, error;
653 unsigned long start_pfn, end_pfn;
654 653
655 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); 654 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
656 aper_size = aper_base = info->aper_size = 0; 655 aper_size = aper_base = info->aper_size = 0;
@@ -697,12 +696,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
697 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", 696 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
698 aper_base, aper_size>>10); 697 aper_base, aper_size>>10);
699 698
700 /* need to map that range */
701 end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
702 if (end_pfn > max_low_pfn_mapped) {
703 start_pfn = (aper_base>>PAGE_SHIFT);
704 init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
705 }
706 return 0; 699 return 0;
707 700
708 nommu: 701 nommu:
@@ -745,7 +738,8 @@ void __init gart_iommu_init(void)
745{ 738{
746 struct agp_kern_info info; 739 struct agp_kern_info info;
747 unsigned long iommu_start; 740 unsigned long iommu_start;
748 unsigned long aper_size; 741 unsigned long aper_base, aper_size;
742 unsigned long start_pfn, end_pfn;
749 unsigned long scratch; 743 unsigned long scratch;
750 long i; 744 long i;
751 745
@@ -783,8 +777,16 @@ void __init gart_iommu_init(void)
783 return; 777 return;
784 } 778 }
785 779
780 /* need to map that range */
781 aper_size = info.aper_size << 20;
782 aper_base = info.aper_base;
783 end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
784 if (end_pfn > max_low_pfn_mapped) {
785 start_pfn = (aper_base>>PAGE_SHIFT);
786 init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
787 }
788
786 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); 789 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
787 aper_size = info.aper_size * 1024 * 1024;
788 iommu_size = check_iommu_size(info.aper_base, aper_size); 790 iommu_size = check_iommu_size(info.aper_base, aper_size);
789 iommu_pages = iommu_size >> PAGE_SHIFT; 791 iommu_pages = iommu_size >> PAGE_SHIFT;
790 792
diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c
index bc1f2d3ea277..a311ffcaad16 100644
--- a/arch/x86/kernel/pcspeaker.c
+++ b/arch/x86/kernel/pcspeaker.c
@@ -1,20 +1,13 @@
1#include <linux/platform_device.h> 1#include <linux/platform_device.h>
2#include <linux/errno.h> 2#include <linux/err.h>
3#include <linux/init.h> 3#include <linux/init.h>
4 4
5static __init int add_pcspkr(void) 5static __init int add_pcspkr(void)
6{ 6{
7 struct platform_device *pd; 7 struct platform_device *pd;
8 int ret;
9 8
10 pd = platform_device_alloc("pcspkr", -1); 9 pd = platform_device_register_simple("pcspkr", -1, NULL, 0);
11 if (!pd)
12 return -ENOMEM;
13 10
14 ret = platform_device_add(pd); 11 return IS_ERR(pd) ? PTR_ERR(pd) : 0;
15 if (ret)
16 platform_device_put(pd);
17
18 return ret;
19} 12}
20device_initcall(add_pcspkr); 13device_initcall(add_pcspkr);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7fc4d5b0a6a0..ec7a2ba9bce8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -185,7 +185,8 @@ static void mwait_idle(void)
185static void poll_idle(void) 185static void poll_idle(void)
186{ 186{
187 local_irq_enable(); 187 local_irq_enable();
188 cpu_relax(); 188 while (!need_resched())
189 cpu_relax();
189} 190}
190 191
191/* 192/*
@@ -246,6 +247,14 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
246 return 1; 247 return 1;
247} 248}
248 249
250static cpumask_t c1e_mask = CPU_MASK_NONE;
251static int c1e_detected;
252
253void c1e_remove_cpu(int cpu)
254{
255 cpu_clear(cpu, c1e_mask);
256}
257
249/* 258/*
250 * C1E aware idle routine. We check for C1E active in the interrupt 259 * C1E aware idle routine. We check for C1E active in the interrupt
251 * pending message MSR. If we detect C1E, then we handle it the same 260 * pending message MSR. If we detect C1E, then we handle it the same
@@ -253,9 +262,6 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
253 */ 262 */
254static void c1e_idle(void) 263static void c1e_idle(void)
255{ 264{
256 static cpumask_t c1e_mask = CPU_MASK_NONE;
257 static int c1e_detected;
258
259 if (need_resched()) 265 if (need_resched())
260 return; 266 return;
261 267
@@ -265,8 +271,10 @@ static void c1e_idle(void)
265 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); 271 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
266 if (lo & K8_INTP_C1E_ACTIVE_MASK) { 272 if (lo & K8_INTP_C1E_ACTIVE_MASK) {
267 c1e_detected = 1; 273 c1e_detected = 1;
268 mark_tsc_unstable("TSC halt in C1E"); 274 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
269 printk(KERN_INFO "System has C1E enabled\n"); 275 mark_tsc_unstable("TSC halt in AMD C1E");
276 printk(KERN_INFO "System has AMD C1E enabled\n");
277 set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
270 } 278 }
271 } 279 }
272 280
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3b7a1ddcc0bc..205188db9626 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -37,6 +37,7 @@
37#include <linux/tick.h> 37#include <linux/tick.h>
38#include <linux/percpu.h> 38#include <linux/percpu.h>
39#include <linux/prctl.h> 39#include <linux/prctl.h>
40#include <linux/dmi.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <asm/pgtable.h> 43#include <asm/pgtable.h>
@@ -55,6 +56,9 @@
55#include <asm/tlbflush.h> 56#include <asm/tlbflush.h>
56#include <asm/cpu.h> 57#include <asm/cpu.h>
57#include <asm/kdebug.h> 58#include <asm/kdebug.h>
59#include <asm/idle.h>
60#include <asm/syscalls.h>
61#include <asm/smp.h>
58 62
59asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 63asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
60 64
@@ -88,6 +92,7 @@ static void cpu_exit_clear(void)
88 cpu_clear(cpu, cpu_callin_map); 92 cpu_clear(cpu, cpu_callin_map);
89 93
90 numa_remove_cpu(cpu); 94 numa_remove_cpu(cpu);
95 c1e_remove_cpu(cpu);
91} 96}
92 97
93/* We don't actually take CPU down, just spin without interrupts. */ 98/* We don't actually take CPU down, just spin without interrupts. */
@@ -159,6 +164,7 @@ void __show_registers(struct pt_regs *regs, int all)
159 unsigned long d0, d1, d2, d3, d6, d7; 164 unsigned long d0, d1, d2, d3, d6, d7;
160 unsigned long sp; 165 unsigned long sp;
161 unsigned short ss, gs; 166 unsigned short ss, gs;
167 const char *board;
162 168
163 if (user_mode_vm(regs)) { 169 if (user_mode_vm(regs)) {
164 sp = regs->sp; 170 sp = regs->sp;
@@ -171,11 +177,15 @@ void __show_registers(struct pt_regs *regs, int all)
171 } 177 }
172 178
173 printk("\n"); 179 printk("\n");
174 printk("Pid: %d, comm: %s %s (%s %.*s)\n", 180
181 board = dmi_get_system_info(DMI_PRODUCT_NAME);
182 if (!board)
183 board = "";
184 printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
175 task_pid_nr(current), current->comm, 185 task_pid_nr(current), current->comm,
176 print_tainted(), init_utsname()->release, 186 print_tainted(), init_utsname()->release,
177 (int)strcspn(init_utsname()->version, " "), 187 (int)strcspn(init_utsname()->version, " "),
178 init_utsname()->version); 188 init_utsname()->version, board);
179 189
180 printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", 190 printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
181 (u16)regs->cs, regs->ip, regs->flags, 191 (u16)regs->cs, regs->ip, regs->flags,
@@ -275,6 +285,14 @@ void exit_thread(void)
275 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; 285 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
276 put_cpu(); 286 put_cpu();
277 } 287 }
288#ifdef CONFIG_X86_DS
289 /* Free any DS contexts that have not been properly released. */
290 if (unlikely(current->thread.ds_ctx)) {
291 /* we clear debugctl to make sure DS is not used. */
292 update_debugctlmsr(0);
293 ds_free(current->thread.ds_ctx);
294 }
295#endif /* CONFIG_X86_DS */
278} 296}
279 297
280void flush_thread(void) 298void flush_thread(void)
@@ -436,6 +454,35 @@ int set_tsc_mode(unsigned int val)
436 return 0; 454 return 0;
437} 455}
438 456
457#ifdef CONFIG_X86_DS
458static int update_debugctl(struct thread_struct *prev,
459 struct thread_struct *next, unsigned long debugctl)
460{
461 unsigned long ds_prev = 0;
462 unsigned long ds_next = 0;
463
464 if (prev->ds_ctx)
465 ds_prev = (unsigned long)prev->ds_ctx->ds;
466 if (next->ds_ctx)
467 ds_next = (unsigned long)next->ds_ctx->ds;
468
469 if (ds_next != ds_prev) {
470 /* we clear debugctl to make sure DS
471 * is not in use when we change it */
472 debugctl = 0;
473 update_debugctlmsr(0);
474 wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
475 }
476 return debugctl;
477}
478#else
479static int update_debugctl(struct thread_struct *prev,
480 struct thread_struct *next, unsigned long debugctl)
481{
482 return debugctl;
483}
484#endif /* CONFIG_X86_DS */
485
439static noinline void 486static noinline void
440__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 487__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
441 struct tss_struct *tss) 488 struct tss_struct *tss)
@@ -446,14 +493,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
446 prev = &prev_p->thread; 493 prev = &prev_p->thread;
447 next = &next_p->thread; 494 next = &next_p->thread;
448 495
449 debugctl = prev->debugctlmsr; 496 debugctl = update_debugctl(prev, next, prev->debugctlmsr);
450 if (next->ds_area_msr != prev->ds_area_msr) {
451 /* we clear debugctl to make sure DS
452 * is not in use when we change it */
453 debugctl = 0;
454 update_debugctlmsr(0);
455 wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
456 }
457 497
458 if (next->debugctlmsr != debugctl) 498 if (next->debugctlmsr != debugctl)
459 update_debugctlmsr(next->debugctlmsr); 499 update_debugctlmsr(next->debugctlmsr);
@@ -477,13 +517,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
477 hard_enable_TSC(); 517 hard_enable_TSC();
478 } 518 }
479 519
480#ifdef X86_BTS 520#ifdef CONFIG_X86_PTRACE_BTS
481 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 521 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
482 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); 522 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
483 523
484 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) 524 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
485 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); 525 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
486#endif 526#endif /* CONFIG_X86_PTRACE_BTS */
487 527
488 528
489 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 529 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 71553b664e2a..2a8ccb9238b4 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,11 +37,11 @@
37#include <linux/kdebug.h> 37#include <linux/kdebug.h>
38#include <linux/tick.h> 38#include <linux/tick.h>
39#include <linux/prctl.h> 39#include <linux/prctl.h>
40#include <linux/uaccess.h>
41#include <linux/io.h>
40 42
41#include <asm/uaccess.h>
42#include <asm/pgtable.h> 43#include <asm/pgtable.h>
43#include <asm/system.h> 44#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h> 45#include <asm/processor.h>
46#include <asm/i387.h> 46#include <asm/i387.h>
47#include <asm/mmu_context.h> 47#include <asm/mmu_context.h>
@@ -51,6 +51,7 @@
51#include <asm/proto.h> 51#include <asm/proto.h>
52#include <asm/ia32.h> 52#include <asm/ia32.h>
53#include <asm/idle.h> 53#include <asm/idle.h>
54#include <asm/syscalls.h>
54 55
55asmlinkage extern void ret_from_fork(void); 56asmlinkage extern void ret_from_fork(void);
56 57
@@ -88,11 +89,13 @@ void exit_idle(void)
88#ifdef CONFIG_HOTPLUG_CPU 89#ifdef CONFIG_HOTPLUG_CPU
89DECLARE_PER_CPU(int, cpu_state); 90DECLARE_PER_CPU(int, cpu_state);
90 91
91#include <asm/nmi.h> 92#include <linux/nmi.h>
92/* We halt the CPU with physical CPU hotplug */ 93/* We halt the CPU with physical CPU hotplug */
93static inline void play_dead(void) 94static inline void play_dead(void)
94{ 95{
95 idle_task_exit(); 96 idle_task_exit();
97 c1e_remove_cpu(raw_smp_processor_id());
98
96 mb(); 99 mb();
97 /* Ack it */ 100 /* Ack it */
98 __get_cpu_var(cpu_state) = CPU_DEAD; 101 __get_cpu_var(cpu_state) = CPU_DEAD;
@@ -151,7 +154,7 @@ void cpu_idle(void)
151} 154}
152 155
153/* Prints also some state that isn't saved in the pt_regs */ 156/* Prints also some state that isn't saved in the pt_regs */
154void __show_regs(struct pt_regs * regs) 157void __show_regs(struct pt_regs *regs)
155{ 158{
156 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; 159 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
157 unsigned long d0, d1, d2, d3, d6, d7; 160 unsigned long d0, d1, d2, d3, d6, d7;
@@ -160,59 +163,61 @@ void __show_regs(struct pt_regs * regs)
160 163
161 printk("\n"); 164 printk("\n");
162 print_modules(); 165 print_modules();
163 printk("Pid: %d, comm: %.20s %s %s %.*s\n", 166 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
164 current->pid, current->comm, print_tainted(), 167 current->pid, current->comm, print_tainted(),
165 init_utsname()->release, 168 init_utsname()->release,
166 (int)strcspn(init_utsname()->version, " "), 169 (int)strcspn(init_utsname()->version, " "),
167 init_utsname()->version); 170 init_utsname()->version);
168 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); 171 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
169 printk_address(regs->ip, 1); 172 printk_address(regs->ip, 1);
170 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, 173 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
171 regs->flags); 174 regs->sp, regs->flags);
172 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", 175 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
173 regs->ax, regs->bx, regs->cx); 176 regs->ax, regs->bx, regs->cx);
174 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", 177 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
175 regs->dx, regs->si, regs->di); 178 regs->dx, regs->si, regs->di);
176 printk("RBP: %016lx R08: %016lx R09: %016lx\n", 179 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
177 regs->bp, regs->r8, regs->r9); 180 regs->bp, regs->r8, regs->r9);
178 printk("R10: %016lx R11: %016lx R12: %016lx\n", 181 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
179 regs->r10, regs->r11, regs->r12); 182 regs->r10, regs->r11, regs->r12);
180 printk("R13: %016lx R14: %016lx R15: %016lx\n", 183 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
181 regs->r13, regs->r14, regs->r15); 184 regs->r13, regs->r14, regs->r15);
182 185
183 asm("movl %%ds,%0" : "=r" (ds)); 186 asm("movl %%ds,%0" : "=r" (ds));
184 asm("movl %%cs,%0" : "=r" (cs)); 187 asm("movl %%cs,%0" : "=r" (cs));
185 asm("movl %%es,%0" : "=r" (es)); 188 asm("movl %%es,%0" : "=r" (es));
186 asm("movl %%fs,%0" : "=r" (fsindex)); 189 asm("movl %%fs,%0" : "=r" (fsindex));
187 asm("movl %%gs,%0" : "=r" (gsindex)); 190 asm("movl %%gs,%0" : "=r" (gsindex));
188 191
189 rdmsrl(MSR_FS_BASE, fs); 192 rdmsrl(MSR_FS_BASE, fs);
190 rdmsrl(MSR_GS_BASE, gs); 193 rdmsrl(MSR_GS_BASE, gs);
191 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 194 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
192 195
193 cr0 = read_cr0(); 196 cr0 = read_cr0();
194 cr2 = read_cr2(); 197 cr2 = read_cr2();
195 cr3 = read_cr3(); 198 cr3 = read_cr3();
196 cr4 = read_cr4(); 199 cr4 = read_cr4();
197 200
198 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 201 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
199 fs,fsindex,gs,gsindex,shadowgs); 202 fs, fsindex, gs, gsindex, shadowgs);
200 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 203 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
201 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); 204 es, cr0);
205 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
206 cr4);
202 207
203 get_debugreg(d0, 0); 208 get_debugreg(d0, 0);
204 get_debugreg(d1, 1); 209 get_debugreg(d1, 1);
205 get_debugreg(d2, 2); 210 get_debugreg(d2, 2);
206 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); 211 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
207 get_debugreg(d3, 3); 212 get_debugreg(d3, 3);
208 get_debugreg(d6, 6); 213 get_debugreg(d6, 6);
209 get_debugreg(d7, 7); 214 get_debugreg(d7, 7);
210 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); 215 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
211} 216}
212 217
213void show_regs(struct pt_regs *regs) 218void show_regs(struct pt_regs *regs)
214{ 219{
215 printk("CPU %d:", smp_processor_id()); 220 printk(KERN_INFO "CPU %d:", smp_processor_id());
216 __show_regs(regs); 221 __show_regs(regs);
217 show_trace(NULL, regs, (void *)(regs + 1), regs->bp); 222 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
218} 223}
@@ -238,6 +243,14 @@ void exit_thread(void)
238 t->io_bitmap_max = 0; 243 t->io_bitmap_max = 0;
239 put_cpu(); 244 put_cpu();
240 } 245 }
246#ifdef CONFIG_X86_DS
247 /* Free any DS contexts that have not been properly released. */
248 if (unlikely(t->ds_ctx)) {
249 /* we clear debugctl to make sure DS is not used. */
250 update_debugctlmsr(0);
251 ds_free(t->ds_ctx);
252 }
253#endif /* CONFIG_X86_DS */
241} 254}
242 255
243void flush_thread(void) 256void flush_thread(void)
@@ -313,10 +326,10 @@ void prepare_to_copy(struct task_struct *tsk)
313 326
314int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, 327int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
315 unsigned long unused, 328 unsigned long unused,
316 struct task_struct * p, struct pt_regs * regs) 329 struct task_struct *p, struct pt_regs *regs)
317{ 330{
318 int err; 331 int err;
319 struct pt_regs * childregs; 332 struct pt_regs *childregs;
320 struct task_struct *me = current; 333 struct task_struct *me = current;
321 334
322 childregs = ((struct pt_regs *) 335 childregs = ((struct pt_regs *)
@@ -361,10 +374,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
361 if (test_thread_flag(TIF_IA32)) 374 if (test_thread_flag(TIF_IA32))
362 err = do_set_thread_area(p, -1, 375 err = do_set_thread_area(p, -1,
363 (struct user_desc __user *)childregs->si, 0); 376 (struct user_desc __user *)childregs->si, 0);
364 else 377 else
365#endif 378#endif
366 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 379 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
367 if (err) 380 if (err)
368 goto out; 381 goto out;
369 } 382 }
370 err = 0; 383 err = 0;
@@ -471,13 +484,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
471 next = &next_p->thread; 484 next = &next_p->thread;
472 485
473 debugctl = prev->debugctlmsr; 486 debugctl = prev->debugctlmsr;
474 if (next->ds_area_msr != prev->ds_area_msr) { 487
475 /* we clear debugctl to make sure DS 488#ifdef CONFIG_X86_DS
476 * is not in use when we change it */ 489 {
477 debugctl = 0; 490 unsigned long ds_prev = 0, ds_next = 0;
478 update_debugctlmsr(0); 491
479 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); 492 if (prev->ds_ctx)
493 ds_prev = (unsigned long)prev->ds_ctx->ds;
494 if (next->ds_ctx)
495 ds_next = (unsigned long)next->ds_ctx->ds;
496
497 if (ds_next != ds_prev) {
498 /*
499 * We clear debugctl to make sure DS
500 * is not in use when we change it:
501 */
502 debugctl = 0;
503 update_debugctlmsr(0);
504 wrmsrl(MSR_IA32_DS_AREA, ds_next);
505 }
480 } 506 }
507#endif /* CONFIG_X86_DS */
481 508
482 if (next->debugctlmsr != debugctl) 509 if (next->debugctlmsr != debugctl)
483 update_debugctlmsr(next->debugctlmsr); 510 update_debugctlmsr(next->debugctlmsr);
@@ -515,13 +542,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
515 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 542 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
516 } 543 }
517 544
518#ifdef X86_BTS 545#ifdef CONFIG_X86_PTRACE_BTS
519 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 546 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
520 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); 547 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
521 548
522 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) 549 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
523 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); 550 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
524#endif 551#endif /* CONFIG_X86_PTRACE_BTS */
525} 552}
526 553
527/* 554/*
@@ -543,7 +570,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
543 unsigned fsindex, gsindex; 570 unsigned fsindex, gsindex;
544 571
545 /* we're going to use this soon, after a few expensive things */ 572 /* we're going to use this soon, after a few expensive things */
546 if (next_p->fpu_counter>5) 573 if (next_p->fpu_counter > 5)
547 prefetch(next->xstate); 574 prefetch(next->xstate);
548 575
549 /* 576 /*
@@ -551,13 +578,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
551 */ 578 */
552 load_sp0(tss, next); 579 load_sp0(tss, next);
553 580
554 /* 581 /*
555 * Switch DS and ES. 582 * Switch DS and ES.
556 * This won't pick up thread selector changes, but I guess that is ok. 583 * This won't pick up thread selector changes, but I guess that is ok.
557 */ 584 */
558 savesegment(es, prev->es); 585 savesegment(es, prev->es);
559 if (unlikely(next->es | prev->es)) 586 if (unlikely(next->es | prev->es))
560 loadsegment(es, next->es); 587 loadsegment(es, next->es);
561 588
562 savesegment(ds, prev->ds); 589 savesegment(ds, prev->ds);
563 if (unlikely(next->ds | prev->ds)) 590 if (unlikely(next->ds | prev->ds))
@@ -583,7 +610,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
583 */ 610 */
584 arch_leave_lazy_cpu_mode(); 611 arch_leave_lazy_cpu_mode();
585 612
586 /* 613 /*
587 * Switch FS and GS. 614 * Switch FS and GS.
588 * 615 *
589 * Segment register != 0 always requires a reload. Also 616 * Segment register != 0 always requires a reload. Also
@@ -592,13 +619,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
592 */ 619 */
593 if (unlikely(fsindex | next->fsindex | prev->fs)) { 620 if (unlikely(fsindex | next->fsindex | prev->fs)) {
594 loadsegment(fs, next->fsindex); 621 loadsegment(fs, next->fsindex);
595 /* 622 /*
596 * Check if the user used a selector != 0; if yes 623 * Check if the user used a selector != 0; if yes
597 * clear 64bit base, since overloaded base is always 624 * clear 64bit base, since overloaded base is always
598 * mapped to the Null selector 625 * mapped to the Null selector
599 */ 626 */
600 if (fsindex) 627 if (fsindex)
601 prev->fs = 0; 628 prev->fs = 0;
602 } 629 }
603 /* when next process has a 64bit base use it */ 630 /* when next process has a 64bit base use it */
604 if (next->fs) 631 if (next->fs)
@@ -608,7 +635,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
608 if (unlikely(gsindex | next->gsindex | prev->gs)) { 635 if (unlikely(gsindex | next->gsindex | prev->gs)) {
609 load_gs_index(next->gsindex); 636 load_gs_index(next->gsindex);
610 if (gsindex) 637 if (gsindex)
611 prev->gs = 0; 638 prev->gs = 0;
612 } 639 }
613 if (next->gs) 640 if (next->gs)
614 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 641 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
@@ -617,12 +644,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
617 /* Must be after DS reload */ 644 /* Must be after DS reload */
618 unlazy_fpu(prev_p); 645 unlazy_fpu(prev_p);
619 646
620 /* 647 /*
621 * Switch the PDA and FPU contexts. 648 * Switch the PDA and FPU contexts.
622 */ 649 */
623 prev->usersp = read_pda(oldrsp); 650 prev->usersp = read_pda(oldrsp);
624 write_pda(oldrsp, next->usersp); 651 write_pda(oldrsp, next->usersp);
625 write_pda(pcurrent, next_p); 652 write_pda(pcurrent, next_p);
626 653
627 write_pda(kernelstack, 654 write_pda(kernelstack,
628 (unsigned long)task_stack_page(next_p) + 655 (unsigned long)task_stack_page(next_p) +
@@ -663,7 +690,7 @@ long sys_execve(char __user *name, char __user * __user *argv,
663 char __user * __user *envp, struct pt_regs *regs) 690 char __user * __user *envp, struct pt_regs *regs)
664{ 691{
665 long error; 692 long error;
666 char * filename; 693 char *filename;
667 694
668 filename = getname(name); 695 filename = getname(name);
669 error = PTR_ERR(filename); 696 error = PTR_ERR(filename);
@@ -721,55 +748,55 @@ asmlinkage long sys_vfork(struct pt_regs *regs)
721unsigned long get_wchan(struct task_struct *p) 748unsigned long get_wchan(struct task_struct *p)
722{ 749{
723 unsigned long stack; 750 unsigned long stack;
724 u64 fp,ip; 751 u64 fp, ip;
725 int count = 0; 752 int count = 0;
726 753
727 if (!p || p == current || p->state==TASK_RUNNING) 754 if (!p || p == current || p->state == TASK_RUNNING)
728 return 0; 755 return 0;
729 stack = (unsigned long)task_stack_page(p); 756 stack = (unsigned long)task_stack_page(p);
730 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) 757 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
731 return 0; 758 return 0;
732 fp = *(u64 *)(p->thread.sp); 759 fp = *(u64 *)(p->thread.sp);
733 do { 760 do {
734 if (fp < (unsigned long)stack || 761 if (fp < (unsigned long)stack ||
735 fp > (unsigned long)stack+THREAD_SIZE) 762 fp > (unsigned long)stack+THREAD_SIZE)
736 return 0; 763 return 0;
737 ip = *(u64 *)(fp+8); 764 ip = *(u64 *)(fp+8);
738 if (!in_sched_functions(ip)) 765 if (!in_sched_functions(ip))
739 return ip; 766 return ip;
740 fp = *(u64 *)fp; 767 fp = *(u64 *)fp;
741 } while (count++ < 16); 768 } while (count++ < 16);
742 return 0; 769 return 0;
743} 770}
744 771
745long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) 772long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
746{ 773{
747 int ret = 0; 774 int ret = 0;
748 int doit = task == current; 775 int doit = task == current;
749 int cpu; 776 int cpu;
750 777
751 switch (code) { 778 switch (code) {
752 case ARCH_SET_GS: 779 case ARCH_SET_GS:
753 if (addr >= TASK_SIZE_OF(task)) 780 if (addr >= TASK_SIZE_OF(task))
754 return -EPERM; 781 return -EPERM;
755 cpu = get_cpu(); 782 cpu = get_cpu();
756 /* handle small bases via the GDT because that's faster to 783 /* handle small bases via the GDT because that's faster to
757 switch. */ 784 switch. */
758 if (addr <= 0xffffffff) { 785 if (addr <= 0xffffffff) {
759 set_32bit_tls(task, GS_TLS, addr); 786 set_32bit_tls(task, GS_TLS, addr);
760 if (doit) { 787 if (doit) {
761 load_TLS(&task->thread, cpu); 788 load_TLS(&task->thread, cpu);
762 load_gs_index(GS_TLS_SEL); 789 load_gs_index(GS_TLS_SEL);
763 } 790 }
764 task->thread.gsindex = GS_TLS_SEL; 791 task->thread.gsindex = GS_TLS_SEL;
765 task->thread.gs = 0; 792 task->thread.gs = 0;
766 } else { 793 } else {
767 task->thread.gsindex = 0; 794 task->thread.gsindex = 0;
768 task->thread.gs = addr; 795 task->thread.gs = addr;
769 if (doit) { 796 if (doit) {
770 load_gs_index(0); 797 load_gs_index(0);
771 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 798 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
772 } 799 }
773 } 800 }
774 put_cpu(); 801 put_cpu();
775 break; 802 break;
@@ -823,8 +850,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
823 rdmsrl(MSR_KERNEL_GS_BASE, base); 850 rdmsrl(MSR_KERNEL_GS_BASE, base);
824 else 851 else
825 base = task->thread.gs; 852 base = task->thread.gs;
826 } 853 } else
827 else
828 base = task->thread.gs; 854 base = task->thread.gs;
829 ret = put_user(base, (unsigned long __user *)addr); 855 ret = put_user(base, (unsigned long __user *)addr);
830 break; 856 break;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e37dccce85db..e375b658efc3 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -14,6 +14,7 @@
14#include <linux/errno.h> 14#include <linux/errno.h>
15#include <linux/ptrace.h> 15#include <linux/ptrace.h>
16#include <linux/regset.h> 16#include <linux/regset.h>
17#include <linux/tracehook.h>
17#include <linux/user.h> 18#include <linux/user.h>
18#include <linux/elf.h> 19#include <linux/elf.h>
19#include <linux/security.h> 20#include <linux/security.h>
@@ -69,7 +70,7 @@ static inline bool invalid_selector(u16 value)
69 70
70#define FLAG_MASK FLAG_MASK_32 71#define FLAG_MASK FLAG_MASK_32
71 72
72static long *pt_regs_access(struct pt_regs *regs, unsigned long regno) 73static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
73{ 74{
74 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); 75 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
75 regno >>= 2; 76 regno >>= 2;
@@ -554,45 +555,115 @@ static int ptrace_set_debugreg(struct task_struct *child,
554 return 0; 555 return 0;
555} 556}
556 557
557#ifdef X86_BTS 558#ifdef CONFIG_X86_PTRACE_BTS
559/*
560 * The configuration for a particular BTS hardware implementation.
561 */
562struct bts_configuration {
563 /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
564 unsigned char sizeof_bts;
565 /* the size of a field in the BTS record in bytes */
566 unsigned char sizeof_field;
567 /* a bitmask to enable/disable BTS in DEBUGCTL MSR */
568 unsigned long debugctl_mask;
569};
570static struct bts_configuration bts_cfg;
571
572#define BTS_MAX_RECORD_SIZE (8 * 3)
573
574
575/*
576 * Branch Trace Store (BTS) uses the following format. Different
577 * architectures vary in the size of those fields.
578 * - source linear address
579 * - destination linear address
580 * - flags
581 *
582 * Later architectures use 64bit pointers throughout, whereas earlier
583 * architectures use 32bit pointers in 32bit mode.
584 *
585 * We compute the base address for the first 8 fields based on:
586 * - the field size stored in the DS configuration
587 * - the relative field position
588 *
589 * In order to store additional information in the BTS buffer, we use
590 * a special source address to indicate that the record requires
591 * special interpretation.
592 *
593 * Netburst indicated via a bit in the flags field whether the branch
594 * was predicted; this is ignored.
595 */
596
597enum bts_field {
598 bts_from = 0,
599 bts_to,
600 bts_flags,
601
602 bts_escape = (unsigned long)-1,
603 bts_qual = bts_to,
604 bts_jiffies = bts_flags
605};
606
607static inline unsigned long bts_get(const char *base, enum bts_field field)
608{
609 base += (bts_cfg.sizeof_field * field);
610 return *(unsigned long *)base;
611}
558 612
559static int ptrace_bts_get_size(struct task_struct *child) 613static inline void bts_set(char *base, enum bts_field field, unsigned long val)
560{ 614{
561 if (!child->thread.ds_area_msr) 615 base += (bts_cfg.sizeof_field * field);;
562 return -ENXIO; 616 (*(unsigned long *)base) = val;
617}
563 618
564 return ds_get_bts_index((void *)child->thread.ds_area_msr); 619/*
620 * Translate a BTS record from the raw format into the bts_struct format
621 *
622 * out (out): bts_struct interpretation
623 * raw: raw BTS record
624 */
625static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
626{
627 memset(out, 0, sizeof(*out));
628 if (bts_get(raw, bts_from) == bts_escape) {
629 out->qualifier = bts_get(raw, bts_qual);
630 out->variant.jiffies = bts_get(raw, bts_jiffies);
631 } else {
632 out->qualifier = BTS_BRANCH;
633 out->variant.lbr.from_ip = bts_get(raw, bts_from);
634 out->variant.lbr.to_ip = bts_get(raw, bts_to);
635 }
565} 636}
566 637
567static int ptrace_bts_read_record(struct task_struct *child, 638static int ptrace_bts_read_record(struct task_struct *child, size_t index,
568 long index,
569 struct bts_struct __user *out) 639 struct bts_struct __user *out)
570{ 640{
571 struct bts_struct ret; 641 struct bts_struct ret;
572 int retval; 642 const void *bts_record;
573 int bts_end; 643 size_t bts_index, bts_end;
574 int bts_index; 644 int error;
575
576 if (!child->thread.ds_area_msr)
577 return -ENXIO;
578 645
579 if (index < 0) 646 error = ds_get_bts_end(child, &bts_end);
580 return -EINVAL; 647 if (error < 0)
648 return error;
581 649
582 bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
583 if (bts_end <= index) 650 if (bts_end <= index)
584 return -EINVAL; 651 return -EINVAL;
585 652
653 error = ds_get_bts_index(child, &bts_index);
654 if (error < 0)
655 return error;
656
586 /* translate the ptrace bts index into the ds bts index */ 657 /* translate the ptrace bts index into the ds bts index */
587 bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); 658 bts_index += bts_end - (index + 1);
588 bts_index -= (index + 1); 659 if (bts_end <= bts_index)
589 if (bts_index < 0) 660 bts_index -= bts_end;
590 bts_index += bts_end;
591 661
592 retval = ds_read_bts((void *)child->thread.ds_area_msr, 662 error = ds_access_bts(child, bts_index, &bts_record);
593 bts_index, &ret); 663 if (error < 0)
594 if (retval < 0) 664 return error;
595 return retval; 665
666 ptrace_bts_translate_record(&ret, bts_record);
596 667
597 if (copy_to_user(out, &ret, sizeof(ret))) 668 if (copy_to_user(out, &ret, sizeof(ret)))
598 return -EFAULT; 669 return -EFAULT;
@@ -600,101 +671,106 @@ static int ptrace_bts_read_record(struct task_struct *child,
600 return sizeof(ret); 671 return sizeof(ret);
601} 672}
602 673
603static int ptrace_bts_clear(struct task_struct *child)
604{
605 if (!child->thread.ds_area_msr)
606 return -ENXIO;
607
608 return ds_clear((void *)child->thread.ds_area_msr);
609}
610
611static int ptrace_bts_drain(struct task_struct *child, 674static int ptrace_bts_drain(struct task_struct *child,
612 long size, 675 long size,
613 struct bts_struct __user *out) 676 struct bts_struct __user *out)
614{ 677{
615 int end, i; 678 struct bts_struct ret;
616 void *ds = (void *)child->thread.ds_area_msr; 679 const unsigned char *raw;
617 680 size_t end, i;
618 if (!ds) 681 int error;
619 return -ENXIO;
620 682
621 end = ds_get_bts_index(ds); 683 error = ds_get_bts_index(child, &end);
622 if (end <= 0) 684 if (error < 0)
623 return end; 685 return error;
624 686
625 if (size < (end * sizeof(struct bts_struct))) 687 if (size < (end * sizeof(struct bts_struct)))
626 return -EIO; 688 return -EIO;
627 689
628 for (i = 0; i < end; i++, out++) { 690 error = ds_access_bts(child, 0, (const void **)&raw);
629 struct bts_struct ret; 691 if (error < 0)
630 int retval; 692 return error;
631 693
632 retval = ds_read_bts(ds, i, &ret); 694 for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) {
633 if (retval < 0) 695 ptrace_bts_translate_record(&ret, raw);
634 return retval;
635 696
636 if (copy_to_user(out, &ret, sizeof(ret))) 697 if (copy_to_user(out, &ret, sizeof(ret)))
637 return -EFAULT; 698 return -EFAULT;
638 } 699 }
639 700
640 ds_clear(ds); 701 error = ds_clear_bts(child);
702 if (error < 0)
703 return error;
641 704
642 return end; 705 return end;
643} 706}
644 707
708static void ptrace_bts_ovfl(struct task_struct *child)
709{
710 send_sig(child->thread.bts_ovfl_signal, child, 0);
711}
712
645static int ptrace_bts_config(struct task_struct *child, 713static int ptrace_bts_config(struct task_struct *child,
646 long cfg_size, 714 long cfg_size,
647 const struct ptrace_bts_config __user *ucfg) 715 const struct ptrace_bts_config __user *ucfg)
648{ 716{
649 struct ptrace_bts_config cfg; 717 struct ptrace_bts_config cfg;
650 int bts_size, ret = 0; 718 int error = 0;
651 void *ds; 719
720 error = -EOPNOTSUPP;
721 if (!bts_cfg.sizeof_bts)
722 goto errout;
652 723
724 error = -EIO;
653 if (cfg_size < sizeof(cfg)) 725 if (cfg_size < sizeof(cfg))
654 return -EIO; 726 goto errout;
655 727
728 error = -EFAULT;
656 if (copy_from_user(&cfg, ucfg, sizeof(cfg))) 729 if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
657 return -EFAULT; 730 goto errout;
658 731
659 if ((int)cfg.size < 0) 732 error = -EINVAL;
660 return -EINVAL; 733 if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
734 !(cfg.flags & PTRACE_BTS_O_ALLOC))
735 goto errout;
661 736
662 bts_size = 0; 737 if (cfg.flags & PTRACE_BTS_O_ALLOC) {
663 ds = (void *)child->thread.ds_area_msr; 738 ds_ovfl_callback_t ovfl = NULL;
664 if (ds) { 739 unsigned int sig = 0;
665 bts_size = ds_get_bts_size(ds); 740
666 if (bts_size < 0) 741 /* we ignore the error in case we were not tracing child */
667 return bts_size; 742 (void)ds_release_bts(child);
668 }
669 cfg.size = PAGE_ALIGN(cfg.size);
670 743
671 if (bts_size != cfg.size) { 744 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
672 ret = ptrace_bts_realloc(child, cfg.size, 745 if (!cfg.signal)
673 cfg.flags & PTRACE_BTS_O_CUT_SIZE); 746 goto errout;
674 if (ret < 0) 747
748 sig = cfg.signal;
749 ovfl = ptrace_bts_ovfl;
750 }
751
752 error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl);
753 if (error < 0)
675 goto errout; 754 goto errout;
676 755
677 ds = (void *)child->thread.ds_area_msr; 756 child->thread.bts_ovfl_signal = sig;
678 } 757 }
679 758
680 if (cfg.flags & PTRACE_BTS_O_SIGNAL) 759 error = -EINVAL;
681 ret = ds_set_overflow(ds, DS_O_SIGNAL); 760 if (!child->thread.ds_ctx && cfg.flags)
682 else
683 ret = ds_set_overflow(ds, DS_O_WRAP);
684 if (ret < 0)
685 goto errout; 761 goto errout;
686 762
687 if (cfg.flags & PTRACE_BTS_O_TRACE) 763 if (cfg.flags & PTRACE_BTS_O_TRACE)
688 child->thread.debugctlmsr |= ds_debugctl_mask(); 764 child->thread.debugctlmsr |= bts_cfg.debugctl_mask;
689 else 765 else
690 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 766 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
691 767
692 if (cfg.flags & PTRACE_BTS_O_SCHED) 768 if (cfg.flags & PTRACE_BTS_O_SCHED)
693 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 769 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
694 else 770 else
695 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 771 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
696 772
697 ret = sizeof(cfg); 773 error = sizeof(cfg);
698 774
699out: 775out:
700 if (child->thread.debugctlmsr) 776 if (child->thread.debugctlmsr)
@@ -702,10 +778,10 @@ out:
702 else 778 else
703 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 779 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
704 780
705 return ret; 781 return error;
706 782
707errout: 783errout:
708 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 784 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
709 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 785 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
710 goto out; 786 goto out;
711} 787}
@@ -714,29 +790,40 @@ static int ptrace_bts_status(struct task_struct *child,
714 long cfg_size, 790 long cfg_size,
715 struct ptrace_bts_config __user *ucfg) 791 struct ptrace_bts_config __user *ucfg)
716{ 792{
717 void *ds = (void *)child->thread.ds_area_msr;
718 struct ptrace_bts_config cfg; 793 struct ptrace_bts_config cfg;
794 size_t end;
795 const void *base, *max;
796 int error;
719 797
720 if (cfg_size < sizeof(cfg)) 798 if (cfg_size < sizeof(cfg))
721 return -EIO; 799 return -EIO;
722 800
723 memset(&cfg, 0, sizeof(cfg)); 801 error = ds_get_bts_end(child, &end);
802 if (error < 0)
803 return error;
724 804
725 if (ds) { 805 error = ds_access_bts(child, /* index = */ 0, &base);
726 cfg.size = ds_get_bts_size(ds); 806 if (error < 0)
807 return error;
727 808
728 if (ds_get_overflow(ds) == DS_O_SIGNAL) 809 error = ds_access_bts(child, /* index = */ end, &max);
729 cfg.flags |= PTRACE_BTS_O_SIGNAL; 810 if (error < 0)
811 return error;
730 812
731 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && 813 memset(&cfg, 0, sizeof(cfg));
732 child->thread.debugctlmsr & ds_debugctl_mask()) 814 cfg.size = (max - base);
733 cfg.flags |= PTRACE_BTS_O_TRACE; 815 cfg.signal = child->thread.bts_ovfl_signal;
816 cfg.bts_size = sizeof(struct bts_struct);
734 817
735 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) 818 if (cfg.signal)
736 cfg.flags |= PTRACE_BTS_O_SCHED; 819 cfg.flags |= PTRACE_BTS_O_SIGNAL;
737 }
738 820
739 cfg.bts_size = sizeof(struct bts_struct); 821 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
822 child->thread.debugctlmsr & bts_cfg.debugctl_mask)
823 cfg.flags |= PTRACE_BTS_O_TRACE;
824
825 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
826 cfg.flags |= PTRACE_BTS_O_SCHED;
740 827
741 if (copy_to_user(ucfg, &cfg, sizeof(cfg))) 828 if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
742 return -EFAULT; 829 return -EFAULT;
@@ -744,89 +831,38 @@ static int ptrace_bts_status(struct task_struct *child,
744 return sizeof(cfg); 831 return sizeof(cfg);
745} 832}
746 833
747
748static int ptrace_bts_write_record(struct task_struct *child, 834static int ptrace_bts_write_record(struct task_struct *child,
749 const struct bts_struct *in) 835 const struct bts_struct *in)
750{ 836{
751 int retval; 837 unsigned char bts_record[BTS_MAX_RECORD_SIZE];
752 838
753 if (!child->thread.ds_area_msr) 839 BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts);
754 return -ENXIO;
755 840
756 retval = ds_write_bts((void *)child->thread.ds_area_msr, in); 841 memset(bts_record, 0, bts_cfg.sizeof_bts);
757 if (retval) 842 switch (in->qualifier) {
758 return retval; 843 case BTS_INVALID:
844 break;
759 845
760 return sizeof(*in); 846 case BTS_BRANCH:
761} 847 bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
848 bts_set(bts_record, bts_to, in->variant.lbr.to_ip);
849 break;
762 850
763static int ptrace_bts_realloc(struct task_struct *child, 851 case BTS_TASK_ARRIVES:
764 int size, int reduce_size) 852 case BTS_TASK_DEPARTS:
765{ 853 bts_set(bts_record, bts_from, bts_escape);
766 unsigned long rlim, vm; 854 bts_set(bts_record, bts_qual, in->qualifier);
767 int ret, old_size; 855 bts_set(bts_record, bts_jiffies, in->variant.jiffies);
856 break;
768 857
769 if (size < 0) 858 default:
770 return -EINVAL; 859 return -EINVAL;
771
772 old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
773 if (old_size < 0)
774 return old_size;
775
776 ret = ds_free((void **)&child->thread.ds_area_msr);
777 if (ret < 0)
778 goto out;
779
780 size >>= PAGE_SHIFT;
781 old_size >>= PAGE_SHIFT;
782
783 current->mm->total_vm -= old_size;
784 current->mm->locked_vm -= old_size;
785
786 if (size == 0)
787 goto out;
788
789 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
790 vm = current->mm->total_vm + size;
791 if (rlim < vm) {
792 ret = -ENOMEM;
793
794 if (!reduce_size)
795 goto out;
796
797 size = rlim - current->mm->total_vm;
798 if (size <= 0)
799 goto out;
800 }
801
802 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
803 vm = current->mm->locked_vm + size;
804 if (rlim < vm) {
805 ret = -ENOMEM;
806
807 if (!reduce_size)
808 goto out;
809
810 size = rlim - current->mm->locked_vm;
811 if (size <= 0)
812 goto out;
813 } 860 }
814 861
815 ret = ds_allocate((void **)&child->thread.ds_area_msr, 862 /* The writing task will be the switched-to task on a context
816 size << PAGE_SHIFT); 863 * switch. It needs to write into the switched-from task's BTS
817 if (ret < 0) 864 * buffer. */
818 goto out; 865 return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
819
820 current->mm->total_vm += size;
821 current->mm->locked_vm += size;
822
823out:
824 if (child->thread.ds_area_msr)
825 set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
826 else
827 clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
828
829 return ret;
830} 866}
831 867
832void ptrace_bts_take_timestamp(struct task_struct *tsk, 868void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -839,7 +875,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
839 875
840 ptrace_bts_write_record(tsk, &rec); 876 ptrace_bts_write_record(tsk, &rec);
841} 877}
842#endif /* X86_BTS */ 878
879static const struct bts_configuration bts_cfg_netburst = {
880 .sizeof_bts = sizeof(long) * 3,
881 .sizeof_field = sizeof(long),
882 .debugctl_mask = (1<<2)|(1<<3)|(1<<5)
883};
884
885static const struct bts_configuration bts_cfg_pentium_m = {
886 .sizeof_bts = sizeof(long) * 3,
887 .sizeof_field = sizeof(long),
888 .debugctl_mask = (1<<6)|(1<<7)
889};
890
891static const struct bts_configuration bts_cfg_core2 = {
892 .sizeof_bts = 8 * 3,
893 .sizeof_field = 8,
894 .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
895};
896
897static inline void bts_configure(const struct bts_configuration *cfg)
898{
899 bts_cfg = *cfg;
900}
901
902void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
903{
904 switch (c->x86) {
905 case 0x6:
906 switch (c->x86_model) {
907 case 0xD:
908 case 0xE: /* Pentium M */
909 bts_configure(&bts_cfg_pentium_m);
910 break;
911 case 0xF: /* Core2 */
912 case 0x1C: /* Atom */
913 bts_configure(&bts_cfg_core2);
914 break;
915 default:
916 /* sorry, don't know about them */
917 break;
918 }
919 break;
920 case 0xF:
921 switch (c->x86_model) {
922 case 0x0:
923 case 0x1:
924 case 0x2: /* Netburst */
925 bts_configure(&bts_cfg_netburst);
926 break;
927 default:
928 /* sorry, don't know about them */
929 break;
930 }
931 break;
932 default:
933 /* sorry, don't know about them */
934 break;
935 }
936}
937#endif /* CONFIG_X86_PTRACE_BTS */
843 938
844/* 939/*
845 * Called by kernel/ptrace.c when detaching.. 940 * Called by kernel/ptrace.c when detaching..
@@ -852,15 +947,15 @@ void ptrace_disable(struct task_struct *child)
852#ifdef TIF_SYSCALL_EMU 947#ifdef TIF_SYSCALL_EMU
853 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 948 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
854#endif 949#endif
855 if (child->thread.ds_area_msr) { 950#ifdef CONFIG_X86_PTRACE_BTS
856#ifdef X86_BTS 951 (void)ds_release_bts(child);
857 ptrace_bts_realloc(child, 0, 0); 952
858#endif 953 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
859 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 954 if (!child->thread.debugctlmsr)
860 if (!child->thread.debugctlmsr) 955 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
861 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 956
862 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 957 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
863 } 958#endif /* CONFIG_X86_PTRACE_BTS */
864} 959}
865 960
866#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 961#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -980,7 +1075,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
980 /* 1075 /*
981 * These bits need more cooking - not enabled yet: 1076 * These bits need more cooking - not enabled yet:
982 */ 1077 */
983#ifdef X86_BTS 1078#ifdef CONFIG_X86_PTRACE_BTS
984 case PTRACE_BTS_CONFIG: 1079 case PTRACE_BTS_CONFIG:
985 ret = ptrace_bts_config 1080 ret = ptrace_bts_config
986 (child, data, (struct ptrace_bts_config __user *)addr); 1081 (child, data, (struct ptrace_bts_config __user *)addr);
@@ -992,7 +1087,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
992 break; 1087 break;
993 1088
994 case PTRACE_BTS_SIZE: 1089 case PTRACE_BTS_SIZE:
995 ret = ptrace_bts_get_size(child); 1090 ret = ds_get_bts_index(child, /* pos = */ NULL);
996 break; 1091 break;
997 1092
998 case PTRACE_BTS_GET: 1093 case PTRACE_BTS_GET:
@@ -1001,14 +1096,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1001 break; 1096 break;
1002 1097
1003 case PTRACE_BTS_CLEAR: 1098 case PTRACE_BTS_CLEAR:
1004 ret = ptrace_bts_clear(child); 1099 ret = ds_clear_bts(child);
1005 break; 1100 break;
1006 1101
1007 case PTRACE_BTS_DRAIN: 1102 case PTRACE_BTS_DRAIN:
1008 ret = ptrace_bts_drain 1103 ret = ptrace_bts_drain
1009 (child, data, (struct bts_struct __user *) addr); 1104 (child, data, (struct bts_struct __user *) addr);
1010 break; 1105 break;
1011#endif 1106#endif /* CONFIG_X86_PTRACE_BTS */
1012 1107
1013 default: 1108 default:
1014 ret = ptrace_request(child, request, addr, data); 1109 ret = ptrace_request(child, request, addr, data);
@@ -1375,30 +1470,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
1375 force_sig_info(SIGTRAP, &info, tsk); 1470 force_sig_info(SIGTRAP, &info, tsk);
1376} 1471}
1377 1472
1378static void syscall_trace(struct pt_regs *regs)
1379{
1380 if (!(current->ptrace & PT_PTRACED))
1381 return;
1382
1383#if 0
1384 printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
1385 current->comm,
1386 regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
1387 current_thread_info()->flags, current->ptrace);
1388#endif
1389
1390 ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
1391 ? 0x80 : 0));
1392 /*
1393 * this isn't the same as continuing with a signal, but it will do
1394 * for normal use. strace only continues with a signal if the
1395 * stopping signal is not SIGTRAP. -brl
1396 */
1397 if (current->exit_code) {
1398 send_sig(current->exit_code, current, 1);
1399 current->exit_code = 0;
1400 }
1401}
1402 1473
1403#ifdef CONFIG_X86_32 1474#ifdef CONFIG_X86_32
1404# define IS_IA32 1 1475# define IS_IA32 1
@@ -1432,8 +1503,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
1432 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) 1503 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
1433 ret = -1L; 1504 ret = -1L;
1434 1505
1435 if (ret || test_thread_flag(TIF_SYSCALL_TRACE)) 1506 if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
1436 syscall_trace(regs); 1507 tracehook_report_syscall_entry(regs))
1508 ret = -1L;
1437 1509
1438 if (unlikely(current->audit_context)) { 1510 if (unlikely(current->audit_context)) {
1439 if (IS_IA32) 1511 if (IS_IA32)
@@ -1459,7 +1531,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
1459 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); 1531 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
1460 1532
1461 if (test_thread_flag(TIF_SYSCALL_TRACE)) 1533 if (test_thread_flag(TIF_SYSCALL_TRACE))
1462 syscall_trace(regs); 1534 tracehook_report_syscall_exit(regs, 0);
1463 1535
1464 /* 1536 /*
1465 * If TIF_SYSCALL_EMU is set, we only get here because of 1537 * If TIF_SYSCALL_EMU is set, we only get here because of
@@ -1475,6 +1547,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
1475 * system call instruction. 1547 * system call instruction.
1476 */ 1548 */
1477 if (test_thread_flag(TIF_SINGLESTEP) && 1549 if (test_thread_flag(TIF_SINGLESTEP) &&
1478 (current->ptrace & PT_PTRACED)) 1550 tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
1479 send_sigtrap(current, regs, 0); 1551 send_sigtrap(current, regs, 0);
1480} 1552}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 724adfc63cb9..f4c93f1cfc19 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -29,7 +29,11 @@ EXPORT_SYMBOL(pm_power_off);
29 29
30static const struct desc_ptr no_idt = {}; 30static const struct desc_ptr no_idt = {};
31static int reboot_mode; 31static int reboot_mode;
32enum reboot_type reboot_type = BOOT_KBD; 32/*
33 * Keyboard reset and triple fault may result in INIT, not RESET, which
34 * doesn't work when we're in vmx root mode. Try ACPI first.
35 */
36enum reboot_type reboot_type = BOOT_ACPI;
33int reboot_force; 37int reboot_force;
34 38
35#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) 39#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 362d4e7f2d38..141efab52400 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -223,6 +223,9 @@ unsigned long saved_video_mode;
223#define RAMDISK_LOAD_FLAG 0x4000 223#define RAMDISK_LOAD_FLAG 0x4000
224 224
225static char __initdata command_line[COMMAND_LINE_SIZE]; 225static char __initdata command_line[COMMAND_LINE_SIZE];
226#ifdef CONFIG_CMDLINE_BOOL
227static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
228#endif
226 229
227#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) 230#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
228struct edd edd; 231struct edd edd;
@@ -665,11 +668,28 @@ void __init setup_arch(char **cmdline_p)
665 bss_resource.start = virt_to_phys(&__bss_start); 668 bss_resource.start = virt_to_phys(&__bss_start);
666 bss_resource.end = virt_to_phys(&__bss_stop)-1; 669 bss_resource.end = virt_to_phys(&__bss_stop)-1;
667 670
671#ifdef CONFIG_CMDLINE_BOOL
672#ifdef CONFIG_CMDLINE_OVERRIDE
673 strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
674#else
675 if (builtin_cmdline[0]) {
676 /* append boot loader cmdline to builtin */
677 strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
678 strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
679 strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
680 }
681#endif
682#endif
683
668 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); 684 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
669 *cmdline_p = command_line; 685 *cmdline_p = command_line;
670 686
671 parse_early_param(); 687 parse_early_param();
672 688
689#ifdef CONFIG_X86_64
690 check_efer();
691#endif
692
673#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) 693#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
674 /* 694 /*
675 * Must be before kernel pagetables are setup 695 * Must be before kernel pagetables are setup
@@ -738,7 +758,6 @@ void __init setup_arch(char **cmdline_p)
738#else 758#else
739 num_physpages = max_pfn; 759 num_physpages = max_pfn;
740 760
741 check_efer();
742 761
743 /* How many end-of-memory variables you have, grandma! */ 762 /* How many end-of-memory variables you have, grandma! */
744 /* need this before calling reserve_initrd */ 763 /* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 76e305e064f9..0e67f72d9316 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -162,9 +162,16 @@ void __init setup_per_cpu_areas(void)
162 printk(KERN_INFO 162 printk(KERN_INFO
163 "cpu %d has no node %d or node-local memory\n", 163 "cpu %d has no node %d or node-local memory\n",
164 cpu, node); 164 cpu, node);
165 if (ptr)
166 printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n",
167 cpu, __pa(ptr));
165 } 168 }
166 else 169 else {
167 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); 170 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
171 if (ptr)
172 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
173 cpu, node, __pa(ptr));
174 }
168#endif 175#endif
169 per_cpu_offset(cpu) = ptr - __per_cpu_start; 176 per_cpu_offset(cpu) = ptr - __per_cpu_start;
170 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 177 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
index 72bbb519d2dc..8b4956e800ac 100644
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h
@@ -24,4 +24,9 @@ struct rt_sigframe {
24 struct ucontext uc; 24 struct ucontext uc;
25 struct siginfo info; 25 struct siginfo info;
26}; 26};
27
28int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
29 sigset_t *set, struct pt_regs *regs);
30int ia32_setup_frame(int sig, struct k_sigaction *ka,
31 sigset_t *set, struct pt_regs *regs);
27#endif 32#endif
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 6fb5bcdd8933..2a2435d3037d 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -17,6 +17,7 @@
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/wait.h> 19#include <linux/wait.h>
20#include <linux/tracehook.h>
20#include <linux/elf.h> 21#include <linux/elf.h>
21#include <linux/smp.h> 22#include <linux/smp.h>
22#include <linux/mm.h> 23#include <linux/mm.h>
@@ -26,6 +27,7 @@
26#include <asm/uaccess.h> 27#include <asm/uaccess.h>
27#include <asm/i387.h> 28#include <asm/i387.h>
28#include <asm/vdso.h> 29#include <asm/vdso.h>
30#include <asm/syscalls.h>
29 31
30#include "sigframe.h" 32#include "sigframe.h"
31 33
@@ -558,8 +560,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
558 * handler too. 560 * handler too.
559 */ 561 */
560 regs->flags &= ~X86_EFLAGS_TF; 562 regs->flags &= ~X86_EFLAGS_TF;
561 if (test_thread_flag(TIF_SINGLESTEP))
562 ptrace_notify(SIGTRAP);
563 563
564 spin_lock_irq(&current->sighand->siglock); 564 spin_lock_irq(&current->sighand->siglock);
565 sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask); 565 sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
@@ -568,6 +568,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
568 recalc_sigpending(); 568 recalc_sigpending();
569 spin_unlock_irq(&current->sighand->siglock); 569 spin_unlock_irq(&current->sighand->siglock);
570 570
571 tracehook_signal_handler(sig, info, ka, regs,
572 test_thread_flag(TIF_SINGLESTEP));
573
571 return 0; 574 return 0;
572} 575}
573 576
@@ -661,5 +664,10 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
661 if (thread_info_flags & _TIF_SIGPENDING) 664 if (thread_info_flags & _TIF_SIGPENDING)
662 do_signal(regs); 665 do_signal(regs);
663 666
667 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
668 clear_thread_flag(TIF_NOTIFY_RESUME);
669 tracehook_notify_resume(regs);
670 }
671
664 clear_thread_flag(TIF_IRET); 672 clear_thread_flag(TIF_IRET);
665} 673}
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ca316b5b742c..694aa888bb19 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -15,17 +15,21 @@
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <linux/wait.h> 16#include <linux/wait.h>
17#include <linux/ptrace.h> 17#include <linux/ptrace.h>
18#include <linux/tracehook.h>
18#include <linux/unistd.h> 19#include <linux/unistd.h>
19#include <linux/stddef.h> 20#include <linux/stddef.h>
20#include <linux/personality.h> 21#include <linux/personality.h>
21#include <linux/compiler.h> 22#include <linux/compiler.h>
23#include <linux/uaccess.h>
24
22#include <asm/processor.h> 25#include <asm/processor.h>
23#include <asm/ucontext.h> 26#include <asm/ucontext.h>
24#include <asm/uaccess.h>
25#include <asm/i387.h> 27#include <asm/i387.h>
26#include <asm/proto.h> 28#include <asm/proto.h>
27#include <asm/ia32_unistd.h> 29#include <asm/ia32_unistd.h>
28#include <asm/mce.h> 30#include <asm/mce.h>
31#include <asm/syscall.h>
32#include <asm/syscalls.h>
29#include "sigframe.h" 33#include "sigframe.h"
30 34
31#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) 35#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -41,11 +45,6 @@
41# define FIX_EFLAGS __FIX_EFLAGS 45# define FIX_EFLAGS __FIX_EFLAGS
42#endif 46#endif
43 47
44int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
45 sigset_t *set, struct pt_regs * regs);
46int ia32_setup_frame(int sig, struct k_sigaction *ka,
47 sigset_t *set, struct pt_regs * regs);
48
49asmlinkage long 48asmlinkage long
50sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, 49sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
51 struct pt_regs *regs) 50 struct pt_regs *regs)
@@ -128,7 +127,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
128 /* Always make any pending restarted system calls return -EINTR */ 127 /* Always make any pending restarted system calls return -EINTR */
129 current_thread_info()->restart_block.fn = do_no_restart_syscall; 128 current_thread_info()->restart_block.fn = do_no_restart_syscall;
130 129
131#define COPY(x) err |= __get_user(regs->x, &sc->x) 130#define COPY(x) (err |= __get_user(regs->x, &sc->x))
132 131
133 COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); 132 COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
134 COPY(dx); COPY(cx); COPY(ip); 133 COPY(dx); COPY(cx); COPY(ip);
@@ -158,7 +157,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
158 } 157 }
159 158
160 { 159 {
161 struct _fpstate __user * buf; 160 struct _fpstate __user *buf;
162 err |= __get_user(buf, &sc->fpstate); 161 err |= __get_user(buf, &sc->fpstate);
163 162
164 if (buf) { 163 if (buf) {
@@ -198,7 +197,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
198 current->blocked = set; 197 current->blocked = set;
199 recalc_sigpending(); 198 recalc_sigpending();
200 spin_unlock_irq(&current->sighand->siglock); 199 spin_unlock_irq(&current->sighand->siglock);
201 200
202 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) 201 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
203 goto badframe; 202 goto badframe;
204 203
@@ -208,16 +207,17 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
208 return ax; 207 return ax;
209 208
210badframe: 209badframe:
211 signal_fault(regs,frame,"sigreturn"); 210 signal_fault(regs, frame, "sigreturn");
212 return 0; 211 return 0;
213} 212}
214 213
215/* 214/*
216 * Set up a signal frame. 215 * Set up a signal frame.
217 */ 216 */
218 217
219static inline int 218static inline int
220setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me) 219setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
220 unsigned long mask, struct task_struct *me)
221{ 221{
222 int err = 0; 222 int err = 0;
223 223
@@ -273,35 +273,35 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
273} 273}
274 274
275static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 275static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
276 sigset_t *set, struct pt_regs * regs) 276 sigset_t *set, struct pt_regs *regs)
277{ 277{
278 struct rt_sigframe __user *frame; 278 struct rt_sigframe __user *frame;
279 struct _fpstate __user *fp = NULL; 279 struct _fpstate __user *fp = NULL;
280 int err = 0; 280 int err = 0;
281 struct task_struct *me = current; 281 struct task_struct *me = current;
282 282
283 if (used_math()) { 283 if (used_math()) {
284 fp = get_stack(ka, regs, sizeof(struct _fpstate)); 284 fp = get_stack(ka, regs, sizeof(struct _fpstate));
285 frame = (void __user *)round_down( 285 frame = (void __user *)round_down(
286 (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; 286 (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
287 287
288 if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) 288 if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
289 goto give_sigsegv; 289 goto give_sigsegv;
290 290
291 if (save_i387(fp) < 0) 291 if (save_i387(fp) < 0)
292 err |= -1; 292 err |= -1;
293 } else 293 } else
294 frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; 294 frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
295 295
296 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 296 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
297 goto give_sigsegv; 297 goto give_sigsegv;
298 298
299 if (ka->sa.sa_flags & SA_SIGINFO) { 299 if (ka->sa.sa_flags & SA_SIGINFO) {
300 err |= copy_siginfo_to_user(&frame->info, info); 300 err |= copy_siginfo_to_user(&frame->info, info);
301 if (err) 301 if (err)
302 goto give_sigsegv; 302 goto give_sigsegv;
303 } 303 }
304 304
305 /* Create the ucontext. */ 305 /* Create the ucontext. */
306 err |= __put_user(0, &frame->uc.uc_flags); 306 err |= __put_user(0, &frame->uc.uc_flags);
307 err |= __put_user(0, &frame->uc.uc_link); 307 err |= __put_user(0, &frame->uc.uc_link);
@@ -311,9 +311,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
311 err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); 311 err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
312 err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); 312 err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
313 err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); 313 err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
314 if (sizeof(*set) == 16) { 314 if (sizeof(*set) == 16) {
315 __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); 315 __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
316 __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); 316 __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
317 } else 317 } else
318 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 318 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
319 319
@@ -324,7 +324,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
324 err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); 324 err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
325 } else { 325 } else {
326 /* could use a vstub here */ 326 /* could use a vstub here */
327 goto give_sigsegv; 327 goto give_sigsegv;
328 } 328 }
329 329
330 if (err) 330 if (err)
@@ -332,7 +332,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
332 332
333 /* Set up registers for signal handler */ 333 /* Set up registers for signal handler */
334 regs->di = sig; 334 regs->di = sig;
335 /* In case the signal handler was declared without prototypes */ 335 /* In case the signal handler was declared without prototypes */
336 regs->ax = 0; 336 regs->ax = 0;
337 337
338 /* This also works for non SA_SIGINFO handlers because they expect the 338 /* This also works for non SA_SIGINFO handlers because they expect the
@@ -355,37 +355,8 @@ give_sigsegv:
355} 355}
356 356
357/* 357/*
358 * Return -1L or the syscall number that @regs is executing.
359 */
360static long current_syscall(struct pt_regs *regs)
361{
362 /*
363 * We always sign-extend a -1 value being set here,
364 * so this is always either -1L or a syscall number.
365 */
366 return regs->orig_ax;
367}
368
369/*
370 * Return a value that is -EFOO if the system call in @regs->orig_ax
371 * returned an error. This only works for @regs from @current.
372 */
373static long current_syscall_ret(struct pt_regs *regs)
374{
375#ifdef CONFIG_IA32_EMULATION
376 if (test_thread_flag(TIF_IA32))
377 /*
378 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
379 * and will match correctly in comparisons.
380 */
381 return (int) regs->ax;
382#endif
383 return regs->ax;
384}
385
386/*
387 * OK, we're invoking a handler 358 * OK, we're invoking a handler
388 */ 359 */
389 360
390static int 361static int
391handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, 362handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
@@ -394,9 +365,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
394 int ret; 365 int ret;
395 366
396 /* Are we from a system call? */ 367 /* Are we from a system call? */
397 if (current_syscall(regs) >= 0) { 368 if (syscall_get_nr(current, regs) >= 0) {
398 /* If so, check system call restarting.. */ 369 /* If so, check system call restarting.. */
399 switch (current_syscall_ret(regs)) { 370 switch (syscall_get_error(current, regs)) {
400 case -ERESTART_RESTARTBLOCK: 371 case -ERESTART_RESTARTBLOCK:
401 case -ERESTARTNOHAND: 372 case -ERESTARTNOHAND:
402 regs->ax = -EINTR; 373 regs->ax = -EINTR;
@@ -429,7 +400,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
429 ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); 400 ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs);
430 else 401 else
431 ret = ia32_setup_frame(sig, ka, oldset, regs); 402 ret = ia32_setup_frame(sig, ka, oldset, regs);
432 } else 403 } else
433#endif 404#endif
434 ret = setup_rt_frame(sig, ka, info, oldset, regs); 405 ret = setup_rt_frame(sig, ka, info, oldset, regs);
435 406
@@ -453,15 +424,16 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
453 * handler too. 424 * handler too.
454 */ 425 */
455 regs->flags &= ~X86_EFLAGS_TF; 426 regs->flags &= ~X86_EFLAGS_TF;
456 if (test_thread_flag(TIF_SINGLESTEP))
457 ptrace_notify(SIGTRAP);
458 427
459 spin_lock_irq(&current->sighand->siglock); 428 spin_lock_irq(&current->sighand->siglock);
460 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask); 429 sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
461 if (!(ka->sa.sa_flags & SA_NODEFER)) 430 if (!(ka->sa.sa_flags & SA_NODEFER))
462 sigaddset(&current->blocked,sig); 431 sigaddset(&current->blocked, sig);
463 recalc_sigpending(); 432 recalc_sigpending();
464 spin_unlock_irq(&current->sighand->siglock); 433 spin_unlock_irq(&current->sighand->siglock);
434
435 tracehook_signal_handler(sig, info, ka, regs,
436 test_thread_flag(TIF_SINGLESTEP));
465 } 437 }
466 438
467 return ret; 439 return ret;
@@ -518,9 +490,9 @@ static void do_signal(struct pt_regs *regs)
518 } 490 }
519 491
520 /* Did we come from a system call? */ 492 /* Did we come from a system call? */
521 if (current_syscall(regs) >= 0) { 493 if (syscall_get_nr(current, regs) >= 0) {
522 /* Restart the system call - no handlers present */ 494 /* Restart the system call - no handlers present */
523 switch (current_syscall_ret(regs)) { 495 switch (syscall_get_error(current, regs)) {
524 case -ERESTARTNOHAND: 496 case -ERESTARTNOHAND:
525 case -ERESTARTSYS: 497 case -ERESTARTSYS:
526 case -ERESTARTNOINTR: 498 case -ERESTARTNOINTR:
@@ -558,17 +530,23 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
558 /* deal with pending signal delivery */ 530 /* deal with pending signal delivery */
559 if (thread_info_flags & _TIF_SIGPENDING) 531 if (thread_info_flags & _TIF_SIGPENDING)
560 do_signal(regs); 532 do_signal(regs);
533
534 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
535 clear_thread_flag(TIF_NOTIFY_RESUME);
536 tracehook_notify_resume(regs);
537 }
561} 538}
562 539
563void signal_fault(struct pt_regs *regs, void __user *frame, char *where) 540void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
564{ 541{
565 struct task_struct *me = current; 542 struct task_struct *me = current;
566 if (show_unhandled_signals && printk_ratelimit()) { 543 if (show_unhandled_signals && printk_ratelimit()) {
567 printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", 544 printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
568 me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax); 545 me->comm, me->pid, where, frame, regs->ip,
546 regs->sp, regs->orig_ax);
569 print_vma_addr(" in ", regs->ip); 547 print_vma_addr(" in ", regs->ip);
570 printk("\n"); 548 printk("\n");
571 } 549 }
572 550
573 force_sig(SIGSEGV, me); 551 force_sig(SIGSEGV, me);
574} 552}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7985c5b3f916..45531e3ba194 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -88,7 +88,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
88#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) 88#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
89#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) 89#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
90#else 90#else
91struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; 91static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
92#define get_idle_for_cpu(x) (idle_thread_array[(x)]) 92#define get_idle_for_cpu(x) (idle_thread_array[(x)])
93#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) 93#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
94#endif 94#endif
@@ -129,7 +129,7 @@ static int boot_cpu_logical_apicid;
129static cpumask_t cpu_sibling_setup_map; 129static cpumask_t cpu_sibling_setup_map;
130 130
131/* Set if we find a B stepping CPU */ 131/* Set if we find a B stepping CPU */
132int __cpuinitdata smp_b_stepping; 132static int __cpuinitdata smp_b_stepping;
133 133
134#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) 134#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
135 135
@@ -1313,16 +1313,13 @@ __init void prefill_possible_map(void)
1313 if (!num_processors) 1313 if (!num_processors)
1314 num_processors = 1; 1314 num_processors = 1;
1315 1315
1316#ifdef CONFIG_HOTPLUG_CPU
1317 if (additional_cpus == -1) { 1316 if (additional_cpus == -1) {
1318 if (disabled_cpus > 0) 1317 if (disabled_cpus > 0)
1319 additional_cpus = disabled_cpus; 1318 additional_cpus = disabled_cpus;
1320 else 1319 else
1321 additional_cpus = 0; 1320 additional_cpus = 0;
1322 } 1321 }
1323#else 1322
1324 additional_cpus = 0;
1325#endif
1326 possible = num_processors + additional_cpus; 1323 possible = num_processors + additional_cpus;
1327 if (possible > NR_CPUS) 1324 if (possible > NR_CPUS)
1328 possible = NR_CPUS; 1325 possible = NR_CPUS;
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 7066cb855a60..1884a8d12bfa 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -22,6 +22,8 @@
22#include <linux/uaccess.h> 22#include <linux/uaccess.h>
23#include <linux/unistd.h> 23#include <linux/unistd.h>
24 24
25#include <asm/syscalls.h>
26
25asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, 27asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
26 unsigned long prot, unsigned long flags, 28 unsigned long prot, unsigned long flags,
27 unsigned long fd, unsigned long pgoff) 29 unsigned long fd, unsigned long pgoff)
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 3b360ef33817..6bc211accf08 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -13,15 +13,17 @@
13#include <linux/utsname.h> 13#include <linux/utsname.h>
14#include <linux/personality.h> 14#include <linux/personality.h>
15#include <linux/random.h> 15#include <linux/random.h>
16#include <linux/uaccess.h>
16 17
17#include <asm/uaccess.h>
18#include <asm/ia32.h> 18#include <asm/ia32.h>
19#include <asm/syscalls.h>
19 20
20asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, 21asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
21 unsigned long fd, unsigned long off) 22 unsigned long prot, unsigned long flags,
23 unsigned long fd, unsigned long off)
22{ 24{
23 long error; 25 long error;
24 struct file * file; 26 struct file *file;
25 27
26 error = -EINVAL; 28 error = -EINVAL;
27 if (off & ~PAGE_MASK) 29 if (off & ~PAGE_MASK)
@@ -56,9 +58,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
56 unmapped base down for this case. This can give 58 unmapped base down for this case. This can give
57 conflicts with the heap, but we assume that glibc 59 conflicts with the heap, but we assume that glibc
58 malloc knows how to fall back to mmap. Give it 1GB 60 malloc knows how to fall back to mmap. Give it 1GB
59 of playground for now. -AK */ 61 of playground for now. -AK */
60 *begin = 0x40000000; 62 *begin = 0x40000000;
61 *end = 0x80000000; 63 *end = 0x80000000;
62 if (current->flags & PF_RANDOMIZE) { 64 if (current->flags & PF_RANDOMIZE) {
63 new_begin = randomize_range(*begin, *begin + 0x02000000, 0); 65 new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
64 if (new_begin) 66 if (new_begin)
@@ -66,9 +68,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
66 } 68 }
67 } else { 69 } else {
68 *begin = TASK_UNMAPPED_BASE; 70 *begin = TASK_UNMAPPED_BASE;
69 *end = TASK_SIZE; 71 *end = TASK_SIZE;
70 } 72 }
71} 73}
72 74
73unsigned long 75unsigned long
74arch_get_unmapped_area(struct file *filp, unsigned long addr, 76arch_get_unmapped_area(struct file *filp, unsigned long addr,
@@ -78,11 +80,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
78 struct vm_area_struct *vma; 80 struct vm_area_struct *vma;
79 unsigned long start_addr; 81 unsigned long start_addr;
80 unsigned long begin, end; 82 unsigned long begin, end;
81 83
82 if (flags & MAP_FIXED) 84 if (flags & MAP_FIXED)
83 return addr; 85 return addr;
84 86
85 find_start_end(flags, &begin, &end); 87 find_start_end(flags, &begin, &end);
86 88
87 if (len > end) 89 if (len > end)
88 return -ENOMEM; 90 return -ENOMEM;
@@ -96,12 +98,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
96 } 98 }
97 if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) 99 if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32))
98 && len <= mm->cached_hole_size) { 100 && len <= mm->cached_hole_size) {
99 mm->cached_hole_size = 0; 101 mm->cached_hole_size = 0;
100 mm->free_area_cache = begin; 102 mm->free_area_cache = begin;
101 } 103 }
102 addr = mm->free_area_cache; 104 addr = mm->free_area_cache;
103 if (addr < begin) 105 if (addr < begin)
104 addr = begin; 106 addr = begin;
105 start_addr = addr; 107 start_addr = addr;
106 108
107full_search: 109full_search:
@@ -127,7 +129,7 @@ full_search:
127 return addr; 129 return addr;
128 } 130 }
129 if (addr + mm->cached_hole_size < vma->vm_start) 131 if (addr + mm->cached_hole_size < vma->vm_start)
130 mm->cached_hole_size = vma->vm_start - addr; 132 mm->cached_hole_size = vma->vm_start - addr;
131 133
132 addr = vma->vm_end; 134 addr = vma->vm_end;
133 } 135 }
@@ -177,7 +179,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
177 vma = find_vma(mm, addr-len); 179 vma = find_vma(mm, addr-len);
178 if (!vma || addr <= vma->vm_start) 180 if (!vma || addr <= vma->vm_start)
179 /* remember the address as a hint for next time */ 181 /* remember the address as a hint for next time */
180 return (mm->free_area_cache = addr-len); 182 return mm->free_area_cache = addr-len;
181 } 183 }
182 184
183 if (mm->mmap_base < len) 185 if (mm->mmap_base < len)
@@ -194,7 +196,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
194 vma = find_vma(mm, addr); 196 vma = find_vma(mm, addr);
195 if (!vma || addr+len <= vma->vm_start) 197 if (!vma || addr+len <= vma->vm_start)
196 /* remember the address as a hint for next time */ 198 /* remember the address as a hint for next time */
197 return (mm->free_area_cache = addr); 199 return mm->free_area_cache = addr;
198 200
199 /* remember the largest hole we saw so far */ 201 /* remember the largest hole we saw so far */
200 if (addr + mm->cached_hole_size < vma->vm_start) 202 if (addr + mm->cached_hole_size < vma->vm_start)
@@ -224,13 +226,13 @@ bottomup:
224} 226}
225 227
226 228
227asmlinkage long sys_uname(struct new_utsname __user * name) 229asmlinkage long sys_uname(struct new_utsname __user *name)
228{ 230{
229 int err; 231 int err;
230 down_read(&uts_sem); 232 down_read(&uts_sem);
231 err = copy_to_user(name, utsname(), sizeof (*name)); 233 err = copy_to_user(name, utsname(), sizeof(*name));
232 up_read(&uts_sem); 234 up_read(&uts_sem);
233 if (personality(current->personality) == PER_LINUX32) 235 if (personality(current->personality) == PER_LINUX32)
234 err |= copy_to_user(&name->machine, "i686", 5); 236 err |= copy_to_user(&name->machine, "i686", 5);
235 return err ? -EFAULT : 0; 237 return err ? -EFAULT : 0;
236} 238}
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 170d43c17487..3d1be4f0fac5 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -8,12 +8,12 @@
8#define __NO_STUBS 8#define __NO_STUBS
9 9
10#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; 10#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
11#undef _ASM_X86_64_UNISTD_H_ 11#undef ASM_X86__UNISTD_64_H
12#include <asm/unistd_64.h> 12#include <asm/unistd_64.h>
13 13
14#undef __SYSCALL 14#undef __SYSCALL
15#define __SYSCALL(nr, sym) [nr] = sym, 15#define __SYSCALL(nr, sym) [nr] = sym,
16#undef _ASM_X86_64_UNISTD_H_ 16#undef ASM_X86__UNISTD_64_H
17 17
18typedef void (*sys_call_ptr_t)(void); 18typedef void (*sys_call_ptr_t)(void);
19 19
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index ffe3c664afc0..bbecf8b6bf96 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -36,6 +36,7 @@
36#include <asm/arch_hooks.h> 36#include <asm/arch_hooks.h>
37#include <asm/hpet.h> 37#include <asm/hpet.h>
38#include <asm/time.h> 38#include <asm/time.h>
39#include <asm/timer.h>
39 40
40#include "do_timer.h" 41#include "do_timer.h"
41 42
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index ab6bf375a307..6bb7b8579e70 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -10,6 +10,7 @@
10#include <asm/ldt.h> 10#include <asm/ldt.h>
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/proto.h> 12#include <asm/proto.h>
13#include <asm/syscalls.h>
13 14
14#include "tls.h" 15#include "tls.h"
15 16
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 513caaca7115..7a31f104bef9 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -32,6 +32,8 @@
32#include <linux/bug.h> 32#include <linux/bug.h>
33#include <linux/nmi.h> 33#include <linux/nmi.h>
34#include <linux/mm.h> 34#include <linux/mm.h>
35#include <linux/smp.h>
36#include <linux/io.h>
35 37
36#if defined(CONFIG_EDAC) 38#if defined(CONFIG_EDAC)
37#include <linux/edac.h> 39#include <linux/edac.h>
@@ -45,9 +47,6 @@
45#include <asm/unwind.h> 47#include <asm/unwind.h>
46#include <asm/desc.h> 48#include <asm/desc.h>
47#include <asm/i387.h> 49#include <asm/i387.h>
48#include <asm/nmi.h>
49#include <asm/smp.h>
50#include <asm/io.h>
51#include <asm/pgalloc.h> 50#include <asm/pgalloc.h>
52#include <asm/proto.h> 51#include <asm/proto.h>
53#include <asm/pda.h> 52#include <asm/pda.h>
@@ -85,7 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
85 84
86void printk_address(unsigned long address, int reliable) 85void printk_address(unsigned long address, int reliable)
87{ 86{
88 printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address); 87 printk(" [<%016lx>] %s%pS\n",
88 address, reliable ? "" : "? ", (void *) address);
89} 89}
90 90
91static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 91static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -98,7 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
98 [STACKFAULT_STACK - 1] = "#SS", 98 [STACKFAULT_STACK - 1] = "#SS",
99 [MCE_STACK - 1] = "#MC", 99 [MCE_STACK - 1] = "#MC",
100#if DEBUG_STKSZ > EXCEPTION_STKSZ 100#if DEBUG_STKSZ > EXCEPTION_STKSZ
101 [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" 101 [N_EXCEPTION_STACKS ...
102 N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
102#endif 103#endif
103 }; 104 };
104 unsigned k; 105 unsigned k;
@@ -163,7 +164,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
163} 164}
164 165
165/* 166/*
166 * x86-64 can have up to three kernel stacks: 167 * x86-64 can have up to three kernel stacks:
167 * process stack 168 * process stack
168 * interrupt stack 169 * interrupt stack
169 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 170 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
@@ -219,7 +220,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
219 const struct stacktrace_ops *ops, void *data) 220 const struct stacktrace_ops *ops, void *data)
220{ 221{
221 const unsigned cpu = get_cpu(); 222 const unsigned cpu = get_cpu();
222 unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; 223 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
223 unsigned used = 0; 224 unsigned used = 0;
224 struct thread_info *tinfo; 225 struct thread_info *tinfo;
225 226
@@ -237,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
237 if (!bp) { 238 if (!bp) {
238 if (task == current) { 239 if (task == current) {
239 /* Grab bp right from our regs */ 240 /* Grab bp right from our regs */
240 asm("movq %%rbp, %0" : "=r" (bp) :); 241 asm("movq %%rbp, %0" : "=r" (bp) : );
241 } else { 242 } else {
242 /* bp is the last reg pushed by switch_to */ 243 /* bp is the last reg pushed by switch_to */
243 bp = *(unsigned long *) task->thread.sp; 244 bp = *(unsigned long *) task->thread.sp;
@@ -339,9 +340,8 @@ static void
339show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 340show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
340 unsigned long *stack, unsigned long bp, char *log_lvl) 341 unsigned long *stack, unsigned long bp, char *log_lvl)
341{ 342{
342 printk("\nCall Trace:\n"); 343 printk("Call Trace:\n");
343 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 344 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
344 printk("\n");
345} 345}
346 346
347void show_trace(struct task_struct *task, struct pt_regs *regs, 347void show_trace(struct task_struct *task, struct pt_regs *regs,
@@ -357,11 +357,15 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
357 unsigned long *stack; 357 unsigned long *stack;
358 int i; 358 int i;
359 const int cpu = smp_processor_id(); 359 const int cpu = smp_processor_id();
360 unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); 360 unsigned long *irqstack_end =
361 unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); 361 (unsigned long *) (cpu_pda(cpu)->irqstackptr);
362 unsigned long *irqstack =
363 (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
362 364
363 // debugging aid: "show_stack(NULL, NULL);" prints the 365 /*
364 // back trace for this cpu. 366 * debugging aid: "show_stack(NULL, NULL);" prints the
367 * back trace for this cpu.
368 */
365 369
366 if (sp == NULL) { 370 if (sp == NULL) {
367 if (task) 371 if (task)
@@ -386,6 +390,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
386 printk(" %016lx", *stack++); 390 printk(" %016lx", *stack++);
387 touch_nmi_watchdog(); 391 touch_nmi_watchdog();
388 } 392 }
393 printk("\n");
389 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 394 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
390} 395}
391 396
@@ -404,7 +409,7 @@ void dump_stack(void)
404 409
405#ifdef CONFIG_FRAME_POINTER 410#ifdef CONFIG_FRAME_POINTER
406 if (!bp) 411 if (!bp)
407 asm("movq %%rbp, %0" : "=r" (bp):); 412 asm("movq %%rbp, %0" : "=r" (bp) : );
408#endif 413#endif
409 414
410 printk("Pid: %d, comm: %.20s %s %s %.*s\n", 415 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
@@ -414,7 +419,6 @@ void dump_stack(void)
414 init_utsname()->version); 419 init_utsname()->version);
415 show_trace(NULL, NULL, &stack, bp); 420 show_trace(NULL, NULL, &stack, bp);
416} 421}
417
418EXPORT_SYMBOL(dump_stack); 422EXPORT_SYMBOL(dump_stack);
419 423
420void show_registers(struct pt_regs *regs) 424void show_registers(struct pt_regs *regs)
@@ -443,7 +447,6 @@ void show_registers(struct pt_regs *regs)
443 printk("Stack: "); 447 printk("Stack: ");
444 show_stack_log_lvl(NULL, regs, (unsigned long *)sp, 448 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
445 regs->bp, ""); 449 regs->bp, "");
446 printk("\n");
447 450
448 printk(KERN_EMERG "Code: "); 451 printk(KERN_EMERG "Code: ");
449 452
@@ -493,7 +496,7 @@ unsigned __kprobes long oops_begin(void)
493 raw_local_irq_save(flags); 496 raw_local_irq_save(flags);
494 cpu = smp_processor_id(); 497 cpu = smp_processor_id();
495 if (!__raw_spin_trylock(&die_lock)) { 498 if (!__raw_spin_trylock(&die_lock)) {
496 if (cpu == die_owner) 499 if (cpu == die_owner)
497 /* nested oops. should stop eventually */; 500 /* nested oops. should stop eventually */;
498 else 501 else
499 __raw_spin_lock(&die_lock); 502 __raw_spin_lock(&die_lock);
@@ -638,7 +641,7 @@ kernel_trap:
638} 641}
639 642
640#define DO_ERROR(trapnr, signr, str, name) \ 643#define DO_ERROR(trapnr, signr, str, name) \
641asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ 644asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
642{ \ 645{ \
643 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 646 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
644 == NOTIFY_STOP) \ 647 == NOTIFY_STOP) \
@@ -648,7 +651,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
648} 651}
649 652
650#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 653#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
651asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ 654asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
652{ \ 655{ \
653 siginfo_t info; \ 656 siginfo_t info; \
654 info.si_signo = signr; \ 657 info.si_signo = signr; \
@@ -683,7 +686,7 @@ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
683 preempt_conditional_cli(regs); 686 preempt_conditional_cli(regs);
684} 687}
685 688
686asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) 689asmlinkage void do_double_fault(struct pt_regs *regs, long error_code)
687{ 690{
688 static const char str[] = "double fault"; 691 static const char str[] = "double fault";
689 struct task_struct *tsk = current; 692 struct task_struct *tsk = current;
@@ -778,9 +781,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
778} 781}
779 782
780static notrace __kprobes void 783static notrace __kprobes void
781unknown_nmi_error(unsigned char reason, struct pt_regs * regs) 784unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
782{ 785{
783 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 786 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
787 NOTIFY_STOP)
784 return; 788 return;
785 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", 789 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
786 reason); 790 reason);
@@ -882,7 +886,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
882 else if (user_mode(eregs)) 886 else if (user_mode(eregs))
883 regs = task_pt_regs(current); 887 regs = task_pt_regs(current);
884 /* Exception from kernel and interrupts are enabled. Move to 888 /* Exception from kernel and interrupts are enabled. Move to
885 kernel process stack. */ 889 kernel process stack. */
886 else if (eregs->flags & X86_EFLAGS_IF) 890 else if (eregs->flags & X86_EFLAGS_IF)
887 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); 891 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
888 if (eregs != regs) 892 if (eregs != regs)
@@ -891,7 +895,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
891} 895}
892 896
893/* runs on IST stack. */ 897/* runs on IST stack. */
894asmlinkage void __kprobes do_debug(struct pt_regs * regs, 898asmlinkage void __kprobes do_debug(struct pt_regs *regs,
895 unsigned long error_code) 899 unsigned long error_code)
896{ 900{
897 struct task_struct *tsk = current; 901 struct task_struct *tsk = current;
@@ -1035,7 +1039,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs)
1035 1039
1036asmlinkage void bad_intr(void) 1040asmlinkage void bad_intr(void)
1037{ 1041{
1038 printk("bad interrupt"); 1042 printk("bad interrupt");
1039} 1043}
1040 1044
1041asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) 1045asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
@@ -1047,7 +1051,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
1047 1051
1048 conditional_sti(regs); 1052 conditional_sti(regs);
1049 if (!user_mode(regs) && 1053 if (!user_mode(regs) &&
1050 kernel_math_error(regs, "kernel simd math error", 19)) 1054 kernel_math_error(regs, "kernel simd math error", 19))
1051 return; 1055 return;
1052 1056
1053 /* 1057 /*
@@ -1092,7 +1096,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
1092 force_sig_info(SIGFPE, &info, task); 1096 force_sig_info(SIGFPE, &info, task);
1093} 1097}
1094 1098
1095asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs) 1099asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs)
1096{ 1100{
1097} 1101}
1098 1102
@@ -1149,8 +1153,10 @@ void __init trap_init(void)
1149 set_intr_gate(0, &divide_error); 1153 set_intr_gate(0, &divide_error);
1150 set_intr_gate_ist(1, &debug, DEBUG_STACK); 1154 set_intr_gate_ist(1, &debug, DEBUG_STACK);
1151 set_intr_gate_ist(2, &nmi, NMI_STACK); 1155 set_intr_gate_ist(2, &nmi, NMI_STACK);
1152 set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */ 1156 /* int3 can be called from all */
1153 set_system_gate(4, &overflow); /* int4 can be called from all */ 1157 set_system_gate_ist(3, &int3, DEBUG_STACK);
1158 /* int4 can be called from all */
1159 set_system_gate(4, &overflow);
1154 set_intr_gate(5, &bounds); 1160 set_intr_gate(5, &bounds);
1155 set_intr_gate(6, &invalid_op); 1161 set_intr_gate(6, &invalid_op);
1156 set_intr_gate(7, &device_not_available); 1162 set_intr_gate(7, &device_not_available);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 8f98e9de1b82..161bb850fc47 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -104,7 +104,7 @@ __setup("notsc", notsc_setup);
104/* 104/*
105 * Read TSC and the reference counters. Take care of SMI disturbance 105 * Read TSC and the reference counters. Take care of SMI disturbance
106 */ 106 */
107static u64 tsc_read_refs(u64 *pm, u64 *hpet) 107static u64 tsc_read_refs(u64 *p, int hpet)
108{ 108{
109 u64 t1, t2; 109 u64 t1, t2;
110 int i; 110 int i;
@@ -112,9 +112,9 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet)
112 for (i = 0; i < MAX_RETRIES; i++) { 112 for (i = 0; i < MAX_RETRIES; i++) {
113 t1 = get_cycles(); 113 t1 = get_cycles();
114 if (hpet) 114 if (hpet)
115 *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; 115 *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
116 else 116 else
117 *pm = acpi_pm_read_early(); 117 *p = acpi_pm_read_early();
118 t2 = get_cycles(); 118 t2 = get_cycles();
119 if ((t2 - t1) < SMI_TRESHOLD) 119 if ((t2 - t1) < SMI_TRESHOLD)
120 return t2; 120 return t2;
@@ -123,13 +123,59 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet)
123} 123}
124 124
125/* 125/*
126 * Calculate the TSC frequency from HPET reference
127 */
128static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
129{
130 u64 tmp;
131
132 if (hpet2 < hpet1)
133 hpet2 += 0x100000000ULL;
134 hpet2 -= hpet1;
135 tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
136 do_div(tmp, 1000000);
137 do_div(deltatsc, tmp);
138
139 return (unsigned long) deltatsc;
140}
141
142/*
143 * Calculate the TSC frequency from PMTimer reference
144 */
145static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
146{
147 u64 tmp;
148
149 if (!pm1 && !pm2)
150 return ULONG_MAX;
151
152 if (pm2 < pm1)
153 pm2 += (u64)ACPI_PM_OVRRUN;
154 pm2 -= pm1;
155 tmp = pm2 * 1000000000LL;
156 do_div(tmp, PMTMR_TICKS_PER_SEC);
157 do_div(deltatsc, tmp);
158
159 return (unsigned long) deltatsc;
160}
161
162#define CAL_MS 10
163#define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS))
164#define CAL_PIT_LOOPS 1000
165
166#define CAL2_MS 50
167#define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS))
168#define CAL2_PIT_LOOPS 5000
169
170
171/*
126 * Try to calibrate the TSC against the Programmable 172 * Try to calibrate the TSC against the Programmable
127 * Interrupt Timer and return the frequency of the TSC 173 * Interrupt Timer and return the frequency of the TSC
128 * in kHz. 174 * in kHz.
129 * 175 *
130 * Return ULONG_MAX on failure to calibrate. 176 * Return ULONG_MAX on failure to calibrate.
131 */ 177 */
132static unsigned long pit_calibrate_tsc(void) 178static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
133{ 179{
134 u64 tsc, t1, t2, delta; 180 u64 tsc, t1, t2, delta;
135 unsigned long tscmin, tscmax; 181 unsigned long tscmin, tscmax;
@@ -144,8 +190,8 @@ static unsigned long pit_calibrate_tsc(void)
144 * (LSB then MSB) to begin countdown. 190 * (LSB then MSB) to begin countdown.
145 */ 191 */
146 outb(0xb0, 0x43); 192 outb(0xb0, 0x43);
147 outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); 193 outb(latch & 0xff, 0x42);
148 outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); 194 outb(latch >> 8, 0x42);
149 195
150 tsc = t1 = t2 = get_cycles(); 196 tsc = t1 = t2 = get_cycles();
151 197
@@ -166,31 +212,154 @@ static unsigned long pit_calibrate_tsc(void)
166 /* 212 /*
167 * Sanity checks: 213 * Sanity checks:
168 * 214 *
169 * If we were not able to read the PIT more than 5000 215 * If we were not able to read the PIT more than loopmin
170 * times, then we have been hit by a massive SMI 216 * times, then we have been hit by a massive SMI
171 * 217 *
172 * If the maximum is 10 times larger than the minimum, 218 * If the maximum is 10 times larger than the minimum,
173 * then we got hit by an SMI as well. 219 * then we got hit by an SMI as well.
174 */ 220 */
175 if (pitcnt < 5000 || tscmax > 10 * tscmin) 221 if (pitcnt < loopmin || tscmax > 10 * tscmin)
176 return ULONG_MAX; 222 return ULONG_MAX;
177 223
178 /* Calculate the PIT value */ 224 /* Calculate the PIT value */
179 delta = t2 - t1; 225 delta = t2 - t1;
180 do_div(delta, 50); 226 do_div(delta, ms);
181 return delta; 227 return delta;
182} 228}
183 229
230/*
231 * This reads the current MSB of the PIT counter, and
232 * checks if we are running on sufficiently fast and
233 * non-virtualized hardware.
234 *
235 * Our expectations are:
236 *
237 * - the PIT is running at roughly 1.19MHz
238 *
239 * - each IO is going to take about 1us on real hardware,
240 * but we allow it to be much faster (by a factor of 10) or
241 * _slightly_ slower (ie we allow up to a 2us read+counter
242 * update - anything else implies a unacceptably slow CPU
243 * or PIT for the fast calibration to work.
244 *
245 * - with 256 PIT ticks to read the value, we have 214us to
246 * see the same MSB (and overhead like doing a single TSC
247 * read per MSB value etc).
248 *
249 * - We're doing 2 reads per loop (LSB, MSB), and we expect
250 * them each to take about a microsecond on real hardware.
251 * So we expect a count value of around 100. But we'll be
252 * generous, and accept anything over 50.
253 *
254 * - if the PIT is stuck, and we see *many* more reads, we
255 * return early (and the next caller of pit_expect_msb()
256 * then consider it a failure when they don't see the
257 * next expected value).
258 *
259 * These expectations mean that we know that we have seen the
260 * transition from one expected value to another with a fairly
261 * high accuracy, and we didn't miss any events. We can thus
262 * use the TSC value at the transitions to calculate a pretty
263 * good value for the TSC frequencty.
264 */
265static inline int pit_expect_msb(unsigned char val)
266{
267 int count = 0;
268
269 for (count = 0; count < 50000; count++) {
270 /* Ignore LSB */
271 inb(0x42);
272 if (inb(0x42) != val)
273 break;
274 }
275 return count > 50;
276}
277
278/*
279 * How many MSB values do we want to see? We aim for a
280 * 15ms calibration, which assuming a 2us counter read
281 * error should give us roughly 150 ppm precision for
282 * the calibration.
283 */
284#define QUICK_PIT_MS 15
285#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
286
287static unsigned long quick_pit_calibrate(void)
288{
289 /* Set the Gate high, disable speaker */
290 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
291
292 /*
293 * Counter 2, mode 0 (one-shot), binary count
294 *
295 * NOTE! Mode 2 decrements by two (and then the
296 * output is flipped each time, giving the same
297 * final output frequency as a decrement-by-one),
298 * so mode 0 is much better when looking at the
299 * individual counts.
300 */
301 outb(0xb0, 0x43);
302
303 /* Start at 0xffff */
304 outb(0xff, 0x42);
305 outb(0xff, 0x42);
306
307 if (pit_expect_msb(0xff)) {
308 int i;
309 u64 t1, t2, delta;
310 unsigned char expect = 0xfe;
311
312 t1 = get_cycles();
313 for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) {
314 if (!pit_expect_msb(expect))
315 goto failed;
316 }
317 t2 = get_cycles();
318
319 /*
320 * Make sure we can rely on the second TSC timestamp:
321 */
322 if (!pit_expect_msb(expect))
323 goto failed;
324
325 /*
326 * Ok, if we get here, then we've seen the
327 * MSB of the PIT decrement QUICK_PIT_ITERATIONS
328 * times, and each MSB had many hits, so we never
329 * had any sudden jumps.
330 *
331 * As a result, we can depend on there not being
332 * any odd delays anywhere, and the TSC reads are
333 * reliable.
334 *
335 * kHz = ticks / time-in-seconds / 1000;
336 * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000
337 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000)
338 */
339 delta = (t2 - t1)*PIT_TICK_RATE;
340 do_div(delta, QUICK_PIT_ITERATIONS*256*1000);
341 printk("Fast TSC calibration using PIT\n");
342 return delta;
343 }
344failed:
345 return 0;
346}
184 347
185/** 348/**
186 * native_calibrate_tsc - calibrate the tsc on boot 349 * native_calibrate_tsc - calibrate the tsc on boot
187 */ 350 */
188unsigned long native_calibrate_tsc(void) 351unsigned long native_calibrate_tsc(void)
189{ 352{
190 u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2; 353 u64 tsc1, tsc2, delta, ref1, ref2;
191 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; 354 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
192 unsigned long flags; 355 unsigned long flags, latch, ms, fast_calibrate;
193 int hpet = is_hpet_enabled(), i; 356 int hpet = is_hpet_enabled(), i, loopmin;
357
358 local_irq_save(flags);
359 fast_calibrate = quick_pit_calibrate();
360 local_irq_restore(flags);
361 if (fast_calibrate)
362 return fast_calibrate;
194 363
195 /* 364 /*
196 * Run 5 calibration loops to get the lowest frequency value 365 * Run 5 calibration loops to get the lowest frequency value
@@ -216,7 +385,13 @@ unsigned long native_calibrate_tsc(void)
216 * calibration delay loop as we have to wait for a certain 385 * calibration delay loop as we have to wait for a certain
217 * amount of time anyway. 386 * amount of time anyway.
218 */ 387 */
219 for (i = 0; i < 5; i++) { 388
389 /* Preset PIT loop values */
390 latch = CAL_LATCH;
391 ms = CAL_MS;
392 loopmin = CAL_PIT_LOOPS;
393
394 for (i = 0; i < 3; i++) {
220 unsigned long tsc_pit_khz; 395 unsigned long tsc_pit_khz;
221 396
222 /* 397 /*
@@ -226,16 +401,16 @@ unsigned long native_calibrate_tsc(void)
226 * read the end value. 401 * read the end value.
227 */ 402 */
228 local_irq_save(flags); 403 local_irq_save(flags);
229 tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); 404 tsc1 = tsc_read_refs(&ref1, hpet);
230 tsc_pit_khz = pit_calibrate_tsc(); 405 tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
231 tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); 406 tsc2 = tsc_read_refs(&ref2, hpet);
232 local_irq_restore(flags); 407 local_irq_restore(flags);
233 408
234 /* Pick the lowest PIT TSC calibration so far */ 409 /* Pick the lowest PIT TSC calibration so far */
235 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); 410 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
236 411
237 /* hpet or pmtimer available ? */ 412 /* hpet or pmtimer available ? */
238 if (!hpet && !pm1 && !pm2) 413 if (!hpet && !ref1 && !ref2)
239 continue; 414 continue;
240 415
241 /* Check, whether the sampling was disturbed by an SMI */ 416 /* Check, whether the sampling was disturbed by an SMI */
@@ -243,23 +418,41 @@ unsigned long native_calibrate_tsc(void)
243 continue; 418 continue;
244 419
245 tsc2 = (tsc2 - tsc1) * 1000000LL; 420 tsc2 = (tsc2 - tsc1) * 1000000LL;
421 if (hpet)
422 tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
423 else
424 tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
246 425
247 if (hpet) { 426 tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
248 if (hpet2 < hpet1) 427
249 hpet2 += 0x100000000ULL; 428 /* Check the reference deviation */
250 hpet2 -= hpet1; 429 delta = ((u64) tsc_pit_min) * 100;
251 tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); 430 do_div(delta, tsc_ref_min);
252 do_div(tsc1, 1000000); 431
253 } else { 432 /*
254 if (pm2 < pm1) 433 * If both calibration results are inside a 10% window
255 pm2 += (u64)ACPI_PM_OVRRUN; 434 * then we can be sure, that the calibration
256 pm2 -= pm1; 435 * succeeded. We break out of the loop right away. We
257 tsc1 = pm2 * 1000000000LL; 436 * use the reference value, as it is more precise.
258 do_div(tsc1, PMTMR_TICKS_PER_SEC); 437 */
438 if (delta >= 90 && delta <= 110) {
439 printk(KERN_INFO
440 "TSC: PIT calibration matches %s. %d loops\n",
441 hpet ? "HPET" : "PMTIMER", i + 1);
442 return tsc_ref_min;
259 } 443 }
260 444
261 do_div(tsc2, tsc1); 445 /*
262 tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); 446 * Check whether PIT failed more than once. This
447 * happens in virtualized environments. We need to
448 * give the virtual PC a slightly longer timeframe for
449 * the HPET/PMTIMER to make the result precise.
450 */
451 if (i == 1 && tsc_pit_min == ULONG_MAX) {
452 latch = CAL2_LATCH;
453 ms = CAL2_MS;
454 loopmin = CAL2_PIT_LOOPS;
455 }
263 } 456 }
264 457
265 /* 458 /*
@@ -270,7 +463,7 @@ unsigned long native_calibrate_tsc(void)
270 printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); 463 printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n");
271 464
272 /* We don't have an alternative source, disable TSC */ 465 /* We don't have an alternative source, disable TSC */
273 if (!hpet && !pm1 && !pm2) { 466 if (!hpet && !ref1 && !ref2) {
274 printk("TSC: No reference (HPET/PMTIMER) available\n"); 467 printk("TSC: No reference (HPET/PMTIMER) available\n");
275 return 0; 468 return 0;
276 } 469 }
@@ -278,7 +471,7 @@ unsigned long native_calibrate_tsc(void)
278 /* The alternative source failed as well, disable TSC */ 471 /* The alternative source failed as well, disable TSC */
279 if (tsc_ref_min == ULONG_MAX) { 472 if (tsc_ref_min == ULONG_MAX) {
280 printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " 473 printk(KERN_WARNING "TSC: HPET/PMTIMER calibration "
281 "failed due to SMI disturbance.\n"); 474 "failed.\n");
282 return 0; 475 return 0;
283 } 476 }
284 477
@@ -290,44 +483,25 @@ unsigned long native_calibrate_tsc(void)
290 } 483 }
291 484
292 /* We don't have an alternative source, use the PIT calibration value */ 485 /* We don't have an alternative source, use the PIT calibration value */
293 if (!hpet && !pm1 && !pm2) { 486 if (!hpet && !ref1 && !ref2) {
294 printk(KERN_INFO "TSC: Using PIT calibration value\n"); 487 printk(KERN_INFO "TSC: Using PIT calibration value\n");
295 return tsc_pit_min; 488 return tsc_pit_min;
296 } 489 }
297 490
298 /* The alternative source failed, use the PIT calibration value */ 491 /* The alternative source failed, use the PIT calibration value */
299 if (tsc_ref_min == ULONG_MAX) { 492 if (tsc_ref_min == ULONG_MAX) {
300 printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due " 493 printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. "
301 "to SMI disturbance. Using PIT calibration\n"); 494 "Using PIT calibration\n");
302 return tsc_pit_min; 495 return tsc_pit_min;
303 } 496 }
304 497
305 /* Check the reference deviation */
306 delta = ((u64) tsc_pit_min) * 100;
307 do_div(delta, tsc_ref_min);
308
309 /*
310 * If both calibration results are inside a 5% window, the we
311 * use the lower frequency of those as it is probably the
312 * closest estimate.
313 */
314 if (delta >= 95 && delta <= 105) {
315 printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n",
316 hpet ? "HPET" : "PMTIMER");
317 printk(KERN_INFO "TSC: using %s calibration value\n",
318 tsc_pit_min <= tsc_ref_min ? "PIT" :
319 hpet ? "HPET" : "PMTIMER");
320 return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min;
321 }
322
323 printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
324 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
325
326 /* 498 /*
327 * The calibration values differ too much. In doubt, we use 499 * The calibration values differ too much. In doubt, we use
328 * the PIT value as we know that there are PMTIMERs around 500 * the PIT value as we know that there are PMTIMERs around
329 * running at double speed. 501 * running at double speed. At least we let the user know:
330 */ 502 */
503 printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
504 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
331 printk(KERN_INFO "TSC: Using PIT calibration value\n"); 505 printk(KERN_INFO "TSC: Using PIT calibration value\n");
332 return tsc_pit_min; 506 return tsc_pit_min;
333} 507}
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 594ef47f0a63..61a97e616f70 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -25,45 +25,31 @@
25#include <asm/visws/cobalt.h> 25#include <asm/visws/cobalt.h>
26#include <asm/visws/piix4.h> 26#include <asm/visws/piix4.h>
27#include <asm/arch_hooks.h> 27#include <asm/arch_hooks.h>
28#include <asm/io_apic.h>
28#include <asm/fixmap.h> 29#include <asm/fixmap.h>
29#include <asm/reboot.h> 30#include <asm/reboot.h>
30#include <asm/setup.h> 31#include <asm/setup.h>
31#include <asm/e820.h> 32#include <asm/e820.h>
32#include <asm/smp.h>
33#include <asm/io.h> 33#include <asm/io.h>
34 34
35#include <mach_ipi.h> 35#include <mach_ipi.h>
36 36
37#include "mach_apic.h" 37#include "mach_apic.h"
38 38
39#include <linux/init.h>
40#include <linux/smp.h>
41
42#include <linux/kernel_stat.h> 39#include <linux/kernel_stat.h>
43#include <linux/interrupt.h>
44#include <linux/init.h>
45 40
46#include <asm/io.h>
47#include <asm/apic.h>
48#include <asm/i8259.h> 41#include <asm/i8259.h>
49#include <asm/irq_vectors.h> 42#include <asm/irq_vectors.h>
50#include <asm/visws/cobalt.h>
51#include <asm/visws/lithium.h> 43#include <asm/visws/lithium.h>
52#include <asm/visws/piix4.h>
53 44
54#include <linux/sched.h> 45#include <linux/sched.h>
55#include <linux/kernel.h> 46#include <linux/kernel.h>
56#include <linux/init.h>
57#include <linux/pci.h> 47#include <linux/pci.h>
58#include <linux/pci_ids.h> 48#include <linux/pci_ids.h>
59 49
60extern int no_broadcast; 50extern int no_broadcast;
61 51
62#include <asm/io.h>
63#include <asm/apic.h> 52#include <asm/apic.h>
64#include <asm/arch_hooks.h>
65#include <asm/visws/cobalt.h>
66#include <asm/visws/lithium.h>
67 53
68char visws_board_type = -1; 54char visws_board_type = -1;
69char visws_board_rev = -1; 55char visws_board_rev = -1;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 38f566fa27d2..4eeb5cf9720d 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -46,6 +46,7 @@
46#include <asm/io.h> 46#include <asm/io.h>
47#include <asm/tlbflush.h> 47#include <asm/tlbflush.h>
48#include <asm/irq.h> 48#include <asm/irq.h>
49#include <asm/syscalls.h>
49 50
50/* 51/*
51 * Known problems: 52 * Known problems:
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 6ca515d6db54..8c9ad02af5a2 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
235 const void *desc) 235 const void *desc)
236{ 236{
237 u32 *ldt_entry = (u32 *)desc; 237 u32 *ldt_entry = (u32 *)desc;
238 vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); 238 vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
239} 239}
240 240
241static void vmi_load_sp0(struct tss_struct *tss, 241static void vmi_load_sp0(struct tss_struct *tss,
@@ -393,13 +393,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
393} 393}
394#endif 394#endif
395 395
396static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn) 396static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
397{ 397{
398 vmi_set_page_type(pfn, VMI_PAGE_L1); 398 vmi_set_page_type(pfn, VMI_PAGE_L1);
399 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); 399 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
400} 400}
401 401
402static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) 402static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn)
403{ 403{
404 /* 404 /*
405 * This call comes in very early, before mem_map is setup. 405 * This call comes in very early, before mem_map is setup.
@@ -410,20 +410,20 @@ static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
410 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); 410 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
411} 411}
412 412
413static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) 413static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count)
414{ 414{
415 vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); 415 vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
416 vmi_check_page_type(clonepfn, VMI_PAGE_L2); 416 vmi_check_page_type(clonepfn, VMI_PAGE_L2);
417 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); 417 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
418} 418}
419 419
420static void vmi_release_pte(u32 pfn) 420static void vmi_release_pte(unsigned long pfn)
421{ 421{
422 vmi_ops.release_page(pfn, VMI_PAGE_L1); 422 vmi_ops.release_page(pfn, VMI_PAGE_L1);
423 vmi_set_page_type(pfn, VMI_PAGE_NORMAL); 423 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
424} 424}
425 425
426static void vmi_release_pmd(u32 pfn) 426static void vmi_release_pmd(unsigned long pfn)
427{ 427{
428 vmi_ops.release_page(pfn, VMI_PAGE_L2); 428 vmi_ops.release_page(pfn, VMI_PAGE_L2);
429 vmi_set_page_type(pfn, VMI_PAGE_NORMAL); 429 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 0c029e8959c7..7766d36983fc 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -61,7 +61,7 @@ static void vsmp_irq_enable(void)
61 native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); 61 native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
62} 62}
63 63
64static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, 64static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
65 unsigned long addr, unsigned len) 65 unsigned long addr, unsigned len)
66{ 66{
67 switch (type) { 67 switch (type) {
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c
index 01b868ba82f8..321cf720dbb6 100644
--- a/arch/x86/lib/msr-on-cpu.c
+++ b/arch/x86/lib/msr-on-cpu.c
@@ -16,37 +16,46 @@ static void __rdmsr_on_cpu(void *info)
16 rdmsr(rv->msr_no, rv->l, rv->h); 16 rdmsr(rv->msr_no, rv->l, rv->h);
17} 17}
18 18
19static void __rdmsr_safe_on_cpu(void *info) 19static void __wrmsr_on_cpu(void *info)
20{ 20{
21 struct msr_info *rv = info; 21 struct msr_info *rv = info;
22 22
23 rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h); 23 wrmsr(rv->msr_no, rv->l, rv->h);
24} 24}
25 25
26static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe) 26int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
27{ 27{
28 int err = 0; 28 int err;
29 struct msr_info rv; 29 struct msr_info rv;
30 30
31 rv.msr_no = msr_no; 31 rv.msr_no = msr_no;
32 if (safe) { 32 err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
33 err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu,
34 &rv, 1);
35 err = err ? err : rv.err;
36 } else {
37 err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
38 }
39 *l = rv.l; 33 *l = rv.l;
40 *h = rv.h; 34 *h = rv.h;
41 35
42 return err; 36 return err;
43} 37}
44 38
45static void __wrmsr_on_cpu(void *info) 39int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
40{
41 int err;
42 struct msr_info rv;
43
44 rv.msr_no = msr_no;
45 rv.l = l;
46 rv.h = h;
47 err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
48
49 return err;
50}
51
52/* These "safe" variants are slower and should be used when the target MSR
53 may not actually exist. */
54static void __rdmsr_safe_on_cpu(void *info)
46{ 55{
47 struct msr_info *rv = info; 56 struct msr_info *rv = info;
48 57
49 wrmsr(rv->msr_no, rv->l, rv->h); 58 rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h);
50} 59}
51 60
52static void __wrmsr_safe_on_cpu(void *info) 61static void __wrmsr_safe_on_cpu(void *info)
@@ -56,45 +65,30 @@ static void __wrmsr_safe_on_cpu(void *info)
56 rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h); 65 rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h);
57} 66}
58 67
59static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe) 68int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
60{ 69{
61 int err = 0; 70 int err;
62 struct msr_info rv; 71 struct msr_info rv;
63 72
64 rv.msr_no = msr_no; 73 rv.msr_no = msr_no;
65 rv.l = l; 74 err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
66 rv.h = h; 75 *l = rv.l;
67 if (safe) { 76 *h = rv.h;
68 err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu,
69 &rv, 1);
70 err = err ? err : rv.err;
71 } else {
72 err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
73 }
74
75 return err;
76}
77 77
78int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) 78 return err ? err : rv.err;
79{
80 return _wrmsr_on_cpu(cpu, msr_no, l, h, 0);
81} 79}
82 80
83int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
84{
85 return _rdmsr_on_cpu(cpu, msr_no, l, h, 0);
86}
87
88/* These "safe" variants are slower and should be used when the target MSR
89 may not actually exist. */
90int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) 81int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
91{ 82{
92 return _wrmsr_on_cpu(cpu, msr_no, l, h, 1); 83 int err;
93} 84 struct msr_info rv;
94 85
95int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) 86 rv.msr_no = msr_no;
96{ 87 rv.l = l;
97 return _rdmsr_on_cpu(cpu, msr_no, l, h, 1); 88 rv.h = h;
89 err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
90
91 return err ? err : rv.err;
98} 92}
99 93
100EXPORT_SYMBOL(rdmsr_on_cpu); 94EXPORT_SYMBOL(rdmsr_on_cpu);
diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c
index 94972e7c094d..82004d2bf05e 100644
--- a/arch/x86/lib/string_32.c
+++ b/arch/x86/lib/string_32.c
@@ -22,7 +22,7 @@ char *strcpy(char *dest, const char *src)
22 "testb %%al,%%al\n\t" 22 "testb %%al,%%al\n\t"
23 "jne 1b" 23 "jne 1b"
24 : "=&S" (d0), "=&D" (d1), "=&a" (d2) 24 : "=&S" (d0), "=&D" (d1), "=&a" (d2)
25 :"0" (src), "1" (dest) : "memory"); 25 : "0" (src), "1" (dest) : "memory");
26 return dest; 26 return dest;
27} 27}
28EXPORT_SYMBOL(strcpy); 28EXPORT_SYMBOL(strcpy);
@@ -42,7 +42,7 @@ char *strncpy(char *dest, const char *src, size_t count)
42 "stosb\n" 42 "stosb\n"
43 "2:" 43 "2:"
44 : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) 44 : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
45 :"0" (src), "1" (dest), "2" (count) : "memory"); 45 : "0" (src), "1" (dest), "2" (count) : "memory");
46 return dest; 46 return dest;
47} 47}
48EXPORT_SYMBOL(strncpy); 48EXPORT_SYMBOL(strncpy);
@@ -60,7 +60,7 @@ char *strcat(char *dest, const char *src)
60 "testb %%al,%%al\n\t" 60 "testb %%al,%%al\n\t"
61 "jne 1b" 61 "jne 1b"
62 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) 62 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
63 : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory"); 63 : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu) : "memory");
64 return dest; 64 return dest;
65} 65}
66EXPORT_SYMBOL(strcat); 66EXPORT_SYMBOL(strcat);
@@ -105,9 +105,9 @@ int strcmp(const char *cs, const char *ct)
105 "2:\tsbbl %%eax,%%eax\n\t" 105 "2:\tsbbl %%eax,%%eax\n\t"
106 "orb $1,%%al\n" 106 "orb $1,%%al\n"
107 "3:" 107 "3:"
108 :"=a" (res), "=&S" (d0), "=&D" (d1) 108 : "=a" (res), "=&S" (d0), "=&D" (d1)
109 :"1" (cs), "2" (ct) 109 : "1" (cs), "2" (ct)
110 :"memory"); 110 : "memory");
111 return res; 111 return res;
112} 112}
113EXPORT_SYMBOL(strcmp); 113EXPORT_SYMBOL(strcmp);
@@ -130,9 +130,9 @@ int strncmp(const char *cs, const char *ct, size_t count)
130 "3:\tsbbl %%eax,%%eax\n\t" 130 "3:\tsbbl %%eax,%%eax\n\t"
131 "orb $1,%%al\n" 131 "orb $1,%%al\n"
132 "4:" 132 "4:"
133 :"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2) 133 : "=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
134 :"1" (cs), "2" (ct), "3" (count) 134 : "1" (cs), "2" (ct), "3" (count)
135 :"memory"); 135 : "memory");
136 return res; 136 return res;
137} 137}
138EXPORT_SYMBOL(strncmp); 138EXPORT_SYMBOL(strncmp);
@@ -152,9 +152,9 @@ char *strchr(const char *s, int c)
152 "movl $1,%1\n" 152 "movl $1,%1\n"
153 "2:\tmovl %1,%0\n\t" 153 "2:\tmovl %1,%0\n\t"
154 "decl %0" 154 "decl %0"
155 :"=a" (res), "=&S" (d0) 155 : "=a" (res), "=&S" (d0)
156 :"1" (s), "0" (c) 156 : "1" (s), "0" (c)
157 :"memory"); 157 : "memory");
158 return res; 158 return res;
159} 159}
160EXPORT_SYMBOL(strchr); 160EXPORT_SYMBOL(strchr);
@@ -169,9 +169,9 @@ size_t strlen(const char *s)
169 "scasb\n\t" 169 "scasb\n\t"
170 "notl %0\n\t" 170 "notl %0\n\t"
171 "decl %0" 171 "decl %0"
172 :"=c" (res), "=&D" (d0) 172 : "=c" (res), "=&D" (d0)
173 :"1" (s), "a" (0), "0" (0xffffffffu) 173 : "1" (s), "a" (0), "0" (0xffffffffu)
174 :"memory"); 174 : "memory");
175 return res; 175 return res;
176} 176}
177EXPORT_SYMBOL(strlen); 177EXPORT_SYMBOL(strlen);
@@ -189,9 +189,9 @@ void *memchr(const void *cs, int c, size_t count)
189 "je 1f\n\t" 189 "je 1f\n\t"
190 "movl $1,%0\n" 190 "movl $1,%0\n"
191 "1:\tdecl %0" 191 "1:\tdecl %0"
192 :"=D" (res), "=&c" (d0) 192 : "=D" (res), "=&c" (d0)
193 :"a" (c), "0" (cs), "1" (count) 193 : "a" (c), "0" (cs), "1" (count)
194 :"memory"); 194 : "memory");
195 return res; 195 return res;
196} 196}
197EXPORT_SYMBOL(memchr); 197EXPORT_SYMBOL(memchr);
@@ -228,9 +228,9 @@ size_t strnlen(const char *s, size_t count)
228 "cmpl $-1,%1\n\t" 228 "cmpl $-1,%1\n\t"
229 "jne 1b\n" 229 "jne 1b\n"
230 "3:\tsubl %2,%0" 230 "3:\tsubl %2,%0"
231 :"=a" (res), "=&d" (d0) 231 : "=a" (res), "=&d" (d0)
232 :"c" (s), "1" (count) 232 : "c" (s), "1" (count)
233 :"memory"); 233 : "memory");
234 return res; 234 return res;
235} 235}
236EXPORT_SYMBOL(strnlen); 236EXPORT_SYMBOL(strnlen);
diff --git a/arch/x86/lib/strstr_32.c b/arch/x86/lib/strstr_32.c
index 42e8a50303f3..8e2d55f754bf 100644
--- a/arch/x86/lib/strstr_32.c
+++ b/arch/x86/lib/strstr_32.c
@@ -23,9 +23,9 @@ __asm__ __volatile__(
23 "jne 1b\n\t" 23 "jne 1b\n\t"
24 "xorl %%eax,%%eax\n\t" 24 "xorl %%eax,%%eax\n\t"
25 "2:" 25 "2:"
26 :"=a" (__res), "=&c" (d0), "=&S" (d1) 26 : "=a" (__res), "=&c" (d0), "=&S" (d1)
27 :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct) 27 : "0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
28 :"dx", "di"); 28 : "dx", "di");
29return __res; 29return __res;
30} 30}
31 31
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 3d317836be9e..3f2cf11f201a 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -10,13 +10,15 @@
10#include <asm/e820.h> 10#include <asm/e820.h>
11#include <asm/setup.h> 11#include <asm/setup.h>
12 12
13#include <mach_ipi.h>
14
13#ifdef CONFIG_HOTPLUG_CPU 15#ifdef CONFIG_HOTPLUG_CPU
14#define DEFAULT_SEND_IPI (1) 16#define DEFAULT_SEND_IPI (1)
15#else 17#else
16#define DEFAULT_SEND_IPI (0) 18#define DEFAULT_SEND_IPI (0)
17#endif 19#endif
18 20
19int no_broadcast=DEFAULT_SEND_IPI; 21int no_broadcast = DEFAULT_SEND_IPI;
20 22
21/** 23/**
22 * pre_intr_init_hook - initialisation prior to setting up interrupt vectors 24 * pre_intr_init_hook - initialisation prior to setting up interrupt vectors
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 62fa440678d8..847c164725f4 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -328,7 +328,7 @@ void __init initmem_init(unsigned long start_pfn,
328 328
329 get_memcfg_numa(); 329 get_memcfg_numa();
330 330
331 kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE); 331 kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
332 332
333 kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); 333 kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
334 do { 334 do {
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index a20d1fa64b4e..e7277cbcfb40 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -148,8 +148,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
148 * we have now. "break" is either changing perms, levels or 148 * we have now. "break" is either changing perms, levels or
149 * address space marker. 149 * address space marker.
150 */ 150 */
151 prot = pgprot_val(new_prot) & ~(PTE_PFN_MASK); 151 prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
152 cur = pgprot_val(st->current_prot) & ~(PTE_PFN_MASK); 152 cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
153 153
154 if (!st->level) { 154 if (!st->level) {
155 /* First entry */ 155 /* First entry */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 455f3fe67b42..8f92cac4e6db 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -35,6 +35,7 @@
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36#include <asm/proto.h> 36#include <asm/proto.h>
37#include <asm-generic/sections.h> 37#include <asm-generic/sections.h>
38#include <asm/traps.h>
38 39
39/* 40/*
40 * Page fault error code bits 41 * Page fault error code bits
@@ -357,8 +358,6 @@ static int is_errata100(struct pt_regs *regs, unsigned long address)
357 return 0; 358 return 0;
358} 359}
359 360
360void do_invalid_op(struct pt_regs *, unsigned long);
361
362static int is_f00f_bug(struct pt_regs *regs, unsigned long address) 361static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
363{ 362{
364#ifdef CONFIG_X86_F00F_BUG 363#ifdef CONFIG_X86_F00F_BUG
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d37f29376b0c..c3789bb19308 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -47,6 +47,7 @@
47#include <asm/paravirt.h> 47#include <asm/paravirt.h>
48#include <asm/setup.h> 48#include <asm/setup.h>
49#include <asm/cacheflush.h> 49#include <asm/cacheflush.h>
50#include <asm/smp.h>
50 51
51unsigned int __VMALLOC_RESERVE = 128 << 20; 52unsigned int __VMALLOC_RESERVE = 128 << 20;
52 53
@@ -194,11 +195,30 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
194 pgd_t *pgd; 195 pgd_t *pgd;
195 pmd_t *pmd; 196 pmd_t *pmd;
196 pte_t *pte; 197 pte_t *pte;
197 unsigned pages_2m = 0, pages_4k = 0; 198 unsigned pages_2m, pages_4k;
199 int mapping_iter;
200
201 /*
202 * First iteration will setup identity mapping using large/small pages
203 * based on use_pse, with other attributes same as set by
204 * the early code in head_32.S
205 *
206 * Second iteration will setup the appropriate attributes (NX, GLOBAL..)
207 * as desired for the kernel identity mapping.
208 *
209 * This two pass mechanism conforms to the TLB app note which says:
210 *
211 * "Software should not write to a paging-structure entry in a way
212 * that would change, for any linear address, both the page size
213 * and either the page frame or attributes."
214 */
215 mapping_iter = 1;
198 216
199 if (!cpu_has_pse) 217 if (!cpu_has_pse)
200 use_pse = 0; 218 use_pse = 0;
201 219
220repeat:
221 pages_2m = pages_4k = 0;
202 pfn = start_pfn; 222 pfn = start_pfn;
203 pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); 223 pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
204 pgd = pgd_base + pgd_idx; 224 pgd = pgd_base + pgd_idx;
@@ -224,6 +244,13 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
224 if (use_pse) { 244 if (use_pse) {
225 unsigned int addr2; 245 unsigned int addr2;
226 pgprot_t prot = PAGE_KERNEL_LARGE; 246 pgprot_t prot = PAGE_KERNEL_LARGE;
247 /*
248 * first pass will use the same initial
249 * identity mapping attribute + _PAGE_PSE.
250 */
251 pgprot_t init_prot =
252 __pgprot(PTE_IDENT_ATTR |
253 _PAGE_PSE);
227 254
228 addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + 255 addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
229 PAGE_OFFSET + PAGE_SIZE-1; 256 PAGE_OFFSET + PAGE_SIZE-1;
@@ -233,7 +260,10 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
233 prot = PAGE_KERNEL_LARGE_EXEC; 260 prot = PAGE_KERNEL_LARGE_EXEC;
234 261
235 pages_2m++; 262 pages_2m++;
236 set_pmd(pmd, pfn_pmd(pfn, prot)); 263 if (mapping_iter == 1)
264 set_pmd(pmd, pfn_pmd(pfn, init_prot));
265 else
266 set_pmd(pmd, pfn_pmd(pfn, prot));
237 267
238 pfn += PTRS_PER_PTE; 268 pfn += PTRS_PER_PTE;
239 continue; 269 continue;
@@ -245,17 +275,43 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
245 for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn; 275 for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
246 pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) { 276 pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
247 pgprot_t prot = PAGE_KERNEL; 277 pgprot_t prot = PAGE_KERNEL;
278 /*
279 * first pass will use the same initial
280 * identity mapping attribute.
281 */
282 pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
248 283
249 if (is_kernel_text(addr)) 284 if (is_kernel_text(addr))
250 prot = PAGE_KERNEL_EXEC; 285 prot = PAGE_KERNEL_EXEC;
251 286
252 pages_4k++; 287 pages_4k++;
253 set_pte(pte, pfn_pte(pfn, prot)); 288 if (mapping_iter == 1)
289 set_pte(pte, pfn_pte(pfn, init_prot));
290 else
291 set_pte(pte, pfn_pte(pfn, prot));
254 } 292 }
255 } 293 }
256 } 294 }
257 update_page_count(PG_LEVEL_2M, pages_2m); 295 if (mapping_iter == 1) {
258 update_page_count(PG_LEVEL_4K, pages_4k); 296 /*
297 * update direct mapping page count only in the first
298 * iteration.
299 */
300 update_page_count(PG_LEVEL_2M, pages_2m);
301 update_page_count(PG_LEVEL_4K, pages_4k);
302
303 /*
304 * local global flush tlb, which will flush the previous
305 * mappings present in both small and large page TLB's.
306 */
307 __flush_tlb_all();
308
309 /*
310 * Second iteration will set the actual desired PTE attributes.
311 */
312 mapping_iter = 2;
313 goto repeat;
314 }
259} 315}
260 316
261/* 317/*
@@ -458,11 +514,7 @@ static void __init pagetable_init(void)
458{ 514{
459 pgd_t *pgd_base = swapper_pg_dir; 515 pgd_t *pgd_base = swapper_pg_dir;
460 516
461 paravirt_pagetable_setup_start(pgd_base);
462
463 permanent_kmaps_init(pgd_base); 517 permanent_kmaps_init(pgd_base);
464
465 paravirt_pagetable_setup_done(pgd_base);
466} 518}
467 519
468#ifdef CONFIG_ACPI_SLEEP 520#ifdef CONFIG_ACPI_SLEEP
@@ -722,7 +774,7 @@ void __init setup_bootmem_allocator(void)
722 after_init_bootmem = 1; 774 after_init_bootmem = 1;
723} 775}
724 776
725static void __init find_early_table_space(unsigned long end) 777static void __init find_early_table_space(unsigned long end, int use_pse)
726{ 778{
727 unsigned long puds, pmds, ptes, tables, start; 779 unsigned long puds, pmds, ptes, tables, start;
728 780
@@ -732,7 +784,7 @@ static void __init find_early_table_space(unsigned long end)
732 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 784 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
733 tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); 785 tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
734 786
735 if (cpu_has_pse) { 787 if (use_pse) {
736 unsigned long extra; 788 unsigned long extra;
737 789
738 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); 790 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
@@ -772,12 +824,22 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
772 pgd_t *pgd_base = swapper_pg_dir; 824 pgd_t *pgd_base = swapper_pg_dir;
773 unsigned long start_pfn, end_pfn; 825 unsigned long start_pfn, end_pfn;
774 unsigned long big_page_start; 826 unsigned long big_page_start;
827#ifdef CONFIG_DEBUG_PAGEALLOC
828 /*
829 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
830 * This will simplify cpa(), which otherwise needs to support splitting
831 * large pages into small in interrupt context, etc.
832 */
833 int use_pse = 0;
834#else
835 int use_pse = cpu_has_pse;
836#endif
775 837
776 /* 838 /*
777 * Find space for the kernel direct mapping tables. 839 * Find space for the kernel direct mapping tables.
778 */ 840 */
779 if (!after_init_bootmem) 841 if (!after_init_bootmem)
780 find_early_table_space(end); 842 find_early_table_space(end, use_pse);
781 843
782#ifdef CONFIG_X86_PAE 844#ifdef CONFIG_X86_PAE
783 set_nx(); 845 set_nx();
@@ -823,7 +885,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
823 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); 885 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
824 if (start_pfn < end_pfn) 886 if (start_pfn < end_pfn)
825 kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 887 kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
826 cpu_has_pse); 888 use_pse);
827 889
828 /* tail is not big page alignment ? */ 890 /* tail is not big page alignment ? */
829 start_pfn = end_pfn; 891 start_pfn = end_pfn;
@@ -986,7 +1048,6 @@ void __init mem_init(void)
986 if (boot_cpu_data.wp_works_ok < 0) 1048 if (boot_cpu_data.wp_works_ok < 0)
987 test_wp_bit(); 1049 test_wp_bit();
988 1050
989 cpa_init();
990 save_pg_dir(); 1051 save_pg_dir();
991 zap_low_mappings(); 1052 zap_low_mappings();
992} 1053}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d3746efb060d..fb30486c82f7 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -225,7 +225,7 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
225void __init cleanup_highmap(void) 225void __init cleanup_highmap(void)
226{ 226{
227 unsigned long vaddr = __START_KERNEL_map; 227 unsigned long vaddr = __START_KERNEL_map;
228 unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1; 228 unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1;
229 pmd_t *pmd = level2_kernel_pgt; 229 pmd_t *pmd = level2_kernel_pgt;
230 pmd_t *last_pmd = pmd + PTRS_PER_PMD; 230 pmd_t *last_pmd = pmd + PTRS_PER_PMD;
231 231
@@ -271,7 +271,8 @@ static __ref void unmap_low_page(void *adr)
271} 271}
272 272
273static unsigned long __meminit 273static unsigned long __meminit
274phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) 274phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
275 pgprot_t prot)
275{ 276{
276 unsigned pages = 0; 277 unsigned pages = 0;
277 unsigned long last_map_addr = end; 278 unsigned long last_map_addr = end;
@@ -289,36 +290,43 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
289 break; 290 break;
290 } 291 }
291 292
293 /*
294 * We will re-use the existing mapping.
295 * Xen for example has some special requirements, like mapping
296 * pagetable pages as RO. So assume someone who pre-setup
297 * these mappings are more intelligent.
298 */
292 if (pte_val(*pte)) 299 if (pte_val(*pte))
293 continue; 300 continue;
294 301
295 if (0) 302 if (0)
296 printk(" pte=%p addr=%lx pte=%016lx\n", 303 printk(" pte=%p addr=%lx pte=%016lx\n",
297 pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); 304 pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
298 set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
299 last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
300 pages++; 305 pages++;
306 set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot));
307 last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
301 } 308 }
309
302 update_page_count(PG_LEVEL_4K, pages); 310 update_page_count(PG_LEVEL_4K, pages);
303 311
304 return last_map_addr; 312 return last_map_addr;
305} 313}
306 314
307static unsigned long __meminit 315static unsigned long __meminit
308phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) 316phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
317 pgprot_t prot)
309{ 318{
310 pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); 319 pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
311 320
312 return phys_pte_init(pte, address, end); 321 return phys_pte_init(pte, address, end, prot);
313} 322}
314 323
315static unsigned long __meminit 324static unsigned long __meminit
316phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, 325phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
317 unsigned long page_size_mask) 326 unsigned long page_size_mask, pgprot_t prot)
318{ 327{
319 unsigned long pages = 0; 328 unsigned long pages = 0;
320 unsigned long last_map_addr = end; 329 unsigned long last_map_addr = end;
321 unsigned long start = address;
322 330
323 int i = pmd_index(address); 331 int i = pmd_index(address);
324 332
@@ -326,6 +334,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
326 unsigned long pte_phys; 334 unsigned long pte_phys;
327 pmd_t *pmd = pmd_page + pmd_index(address); 335 pmd_t *pmd = pmd_page + pmd_index(address);
328 pte_t *pte; 336 pte_t *pte;
337 pgprot_t new_prot = prot;
329 338
330 if (address >= end) { 339 if (address >= end) {
331 if (!after_bootmem) { 340 if (!after_bootmem) {
@@ -339,27 +348,40 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
339 if (!pmd_large(*pmd)) { 348 if (!pmd_large(*pmd)) {
340 spin_lock(&init_mm.page_table_lock); 349 spin_lock(&init_mm.page_table_lock);
341 last_map_addr = phys_pte_update(pmd, address, 350 last_map_addr = phys_pte_update(pmd, address,
342 end); 351 end, prot);
343 spin_unlock(&init_mm.page_table_lock); 352 spin_unlock(&init_mm.page_table_lock);
353 continue;
344 } 354 }
345 /* Count entries we're using from level2_ident_pgt */ 355 /*
346 if (start == 0) 356 * If we are ok with PG_LEVEL_2M mapping, then we will
347 pages++; 357 * use the existing mapping,
348 continue; 358 *
359 * Otherwise, we will split the large page mapping but
360 * use the same existing protection bits except for
361 * large page, so that we don't violate Intel's TLB
362 * Application note (317080) which says, while changing
363 * the page sizes, new and old translations should
364 * not differ with respect to page frame and
365 * attributes.
366 */
367 if (page_size_mask & (1 << PG_LEVEL_2M))
368 continue;
369 new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
349 } 370 }
350 371
351 if (page_size_mask & (1<<PG_LEVEL_2M)) { 372 if (page_size_mask & (1<<PG_LEVEL_2M)) {
352 pages++; 373 pages++;
353 spin_lock(&init_mm.page_table_lock); 374 spin_lock(&init_mm.page_table_lock);
354 set_pte((pte_t *)pmd, 375 set_pte((pte_t *)pmd,
355 pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); 376 pfn_pte(address >> PAGE_SHIFT,
377 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
356 spin_unlock(&init_mm.page_table_lock); 378 spin_unlock(&init_mm.page_table_lock);
357 last_map_addr = (address & PMD_MASK) + PMD_SIZE; 379 last_map_addr = (address & PMD_MASK) + PMD_SIZE;
358 continue; 380 continue;
359 } 381 }
360 382
361 pte = alloc_low_page(&pte_phys); 383 pte = alloc_low_page(&pte_phys);
362 last_map_addr = phys_pte_init(pte, address, end); 384 last_map_addr = phys_pte_init(pte, address, end, new_prot);
363 unmap_low_page(pte); 385 unmap_low_page(pte);
364 386
365 spin_lock(&init_mm.page_table_lock); 387 spin_lock(&init_mm.page_table_lock);
@@ -372,12 +394,12 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
372 394
373static unsigned long __meminit 395static unsigned long __meminit
374phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, 396phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
375 unsigned long page_size_mask) 397 unsigned long page_size_mask, pgprot_t prot)
376{ 398{
377 pmd_t *pmd = pmd_offset(pud, 0); 399 pmd_t *pmd = pmd_offset(pud, 0);
378 unsigned long last_map_addr; 400 unsigned long last_map_addr;
379 401
380 last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); 402 last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
381 __flush_tlb_all(); 403 __flush_tlb_all();
382 return last_map_addr; 404 return last_map_addr;
383} 405}
@@ -394,6 +416,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
394 unsigned long pmd_phys; 416 unsigned long pmd_phys;
395 pud_t *pud = pud_page + pud_index(addr); 417 pud_t *pud = pud_page + pud_index(addr);
396 pmd_t *pmd; 418 pmd_t *pmd;
419 pgprot_t prot = PAGE_KERNEL;
397 420
398 if (addr >= end) 421 if (addr >= end)
399 break; 422 break;
@@ -405,10 +428,26 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
405 } 428 }
406 429
407 if (pud_val(*pud)) { 430 if (pud_val(*pud)) {
408 if (!pud_large(*pud)) 431 if (!pud_large(*pud)) {
409 last_map_addr = phys_pmd_update(pud, addr, end, 432 last_map_addr = phys_pmd_update(pud, addr, end,
410 page_size_mask); 433 page_size_mask, prot);
411 continue; 434 continue;
435 }
436 /*
437 * If we are ok with PG_LEVEL_1G mapping, then we will
438 * use the existing mapping.
439 *
440 * Otherwise, we will split the gbpage mapping but use
441 * the same existing protection bits except for large
442 * page, so that we don't violate Intel's TLB
443 * Application note (317080) which says, while changing
444 * the page sizes, new and old translations should
445 * not differ with respect to page frame and
446 * attributes.
447 */
448 if (page_size_mask & (1 << PG_LEVEL_1G))
449 continue;
450 prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
412 } 451 }
413 452
414 if (page_size_mask & (1<<PG_LEVEL_1G)) { 453 if (page_size_mask & (1<<PG_LEVEL_1G)) {
@@ -422,7 +461,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
422 } 461 }
423 462
424 pmd = alloc_low_page(&pmd_phys); 463 pmd = alloc_low_page(&pmd_phys);
425 last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); 464 last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
465 prot);
426 unmap_low_page(pmd); 466 unmap_low_page(pmd);
427 467
428 spin_lock(&init_mm.page_table_lock); 468 spin_lock(&init_mm.page_table_lock);
@@ -430,6 +470,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
430 spin_unlock(&init_mm.page_table_lock); 470 spin_unlock(&init_mm.page_table_lock);
431 } 471 }
432 __flush_tlb_all(); 472 __flush_tlb_all();
473
433 update_page_count(PG_LEVEL_1G, pages); 474 update_page_count(PG_LEVEL_1G, pages);
434 475
435 return last_map_addr; 476 return last_map_addr;
@@ -446,27 +487,28 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
446 return phys_pud_init(pud, addr, end, page_size_mask); 487 return phys_pud_init(pud, addr, end, page_size_mask);
447} 488}
448 489
449static void __init find_early_table_space(unsigned long end) 490static void __init find_early_table_space(unsigned long end, int use_pse,
491 int use_gbpages)
450{ 492{
451 unsigned long puds, pmds, ptes, tables, start; 493 unsigned long puds, pmds, ptes, tables, start;
452 494
453 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 495 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
454 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); 496 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
455 if (direct_gbpages) { 497 if (use_gbpages) {
456 unsigned long extra; 498 unsigned long extra;
457 extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); 499 extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
458 pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; 500 pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
459 } else 501 } else
460 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 502 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
461 tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); 503 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
462 504
463 if (cpu_has_pse) { 505 if (use_pse) {
464 unsigned long extra; 506 unsigned long extra;
465 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); 507 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
466 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; 508 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
467 } else 509 } else
468 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; 510 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
469 tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE); 511 tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
470 512
471 /* 513 /*
472 * RED-PEN putting page tables only on node 0 could 514 * RED-PEN putting page tables only on node 0 could
@@ -528,6 +570,7 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start,
528 pgd_populate(&init_mm, pgd, __va(pud_phys)); 570 pgd_populate(&init_mm, pgd, __va(pud_phys));
529 spin_unlock(&init_mm.page_table_lock); 571 spin_unlock(&init_mm.page_table_lock);
530 } 572 }
573 __flush_tlb_all();
531 574
532 return last_map_addr; 575 return last_map_addr;
533} 576}
@@ -571,6 +614,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
571 614
572 struct map_range mr[NR_RANGE_MR]; 615 struct map_range mr[NR_RANGE_MR];
573 int nr_range, i; 616 int nr_range, i;
617 int use_pse, use_gbpages;
574 618
575 printk(KERN_INFO "init_memory_mapping\n"); 619 printk(KERN_INFO "init_memory_mapping\n");
576 620
@@ -584,9 +628,21 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
584 if (!after_bootmem) 628 if (!after_bootmem)
585 init_gbpages(); 629 init_gbpages();
586 630
587 if (direct_gbpages) 631#ifdef CONFIG_DEBUG_PAGEALLOC
632 /*
633 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
634 * This will simplify cpa(), which otherwise needs to support splitting
635 * large pages into small in interrupt context, etc.
636 */
637 use_pse = use_gbpages = 0;
638#else
639 use_pse = cpu_has_pse;
640 use_gbpages = direct_gbpages;
641#endif
642
643 if (use_gbpages)
588 page_size_mask |= 1 << PG_LEVEL_1G; 644 page_size_mask |= 1 << PG_LEVEL_1G;
589 if (cpu_has_pse) 645 if (use_pse)
590 page_size_mask |= 1 << PG_LEVEL_2M; 646 page_size_mask |= 1 << PG_LEVEL_2M;
591 647
592 memset(mr, 0, sizeof(mr)); 648 memset(mr, 0, sizeof(mr));
@@ -647,7 +703,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
647 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); 703 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
648 704
649 if (!after_bootmem) 705 if (!after_bootmem)
650 find_early_table_space(end); 706 find_early_table_space(end, use_pse, use_gbpages);
651 707
652 for (i = 0; i < nr_range; i++) 708 for (i = 0; i < nr_range; i++)
653 last_map_addr = kernel_physical_mapping_init( 709 last_map_addr = kernel_physical_mapping_init(
@@ -806,8 +862,6 @@ void __init mem_init(void)
806 reservedpages << (PAGE_SHIFT-10), 862 reservedpages << (PAGE_SHIFT-10),
807 datasize >> 10, 863 datasize >> 10,
808 initsize >> 10); 864 initsize >> 10);
809
810 cpa_init();
811} 865}
812 866
813void free_init_pages(char *what, unsigned long begin, unsigned long end) 867void free_init_pages(char *what, unsigned long begin, unsigned long end)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d4b6e6a29ae3..6ab3196d12b4 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -83,6 +83,25 @@ int page_is_ram(unsigned long pagenr)
83 return 0; 83 return 0;
84} 84}
85 85
86int pagerange_is_ram(unsigned long start, unsigned long end)
87{
88 int ram_page = 0, not_rampage = 0;
89 unsigned long page_nr;
90
91 for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
92 ++page_nr) {
93 if (page_is_ram(page_nr))
94 ram_page = 1;
95 else
96 not_rampage = 1;
97
98 if (ram_page == not_rampage)
99 return -1;
100 }
101
102 return ram_page;
103}
104
86/* 105/*
87 * Fix up the linear direct mapping of the kernel to avoid cache attribute 106 * Fix up the linear direct mapping of the kernel to avoid cache attribute
88 * conflicts. 107 * conflicts.
@@ -421,7 +440,7 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
421 return; 440 return;
422} 441}
423 442
424int __initdata early_ioremap_debug; 443static int __initdata early_ioremap_debug;
425 444
426static int __init early_ioremap_debug_setup(char *str) 445static int __init early_ioremap_debug_setup(char *str)
427{ 446{
@@ -547,7 +566,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
547} 566}
548 567
549 568
550int __initdata early_ioremap_nested; 569static int __initdata early_ioremap_nested;
551 570
552static int __init check_early_ioremap_leak(void) 571static int __init check_early_ioremap_leak(void)
553{ 572{
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a4dd793d6003..cebcbf152d46 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -79,7 +79,7 @@ static int __init allocate_cachealigned_memnodemap(void)
79 return 0; 79 return 0;
80 80
81 addr = 0x8000; 81 addr = 0x8000;
82 nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); 82 nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
83 nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT, 83 nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT,
84 nodemap_size, L1_CACHE_BYTES); 84 nodemap_size, L1_CACHE_BYTES);
85 if (nodemap_addr == -1UL) { 85 if (nodemap_addr == -1UL) {
@@ -176,10 +176,10 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
176 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; 176 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
177 unsigned long bootmap_start, nodedata_phys; 177 unsigned long bootmap_start, nodedata_phys;
178 void *bootmap; 178 void *bootmap;
179 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); 179 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
180 int nid; 180 int nid;
181 181
182 start = round_up(start, ZONE_ALIGN); 182 start = roundup(start, ZONE_ALIGN);
183 183
184 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, 184 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
185 start, end); 185 start, end);
@@ -210,9 +210,9 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
210 bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); 210 bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
211 nid = phys_to_nid(nodedata_phys); 211 nid = phys_to_nid(nodedata_phys);
212 if (nid == nodeid) 212 if (nid == nodeid)
213 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); 213 bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
214 else 214 else
215 bootmap_start = round_up(start, PAGE_SIZE); 215 bootmap_start = roundup(start, PAGE_SIZE);
216 /* 216 /*
217 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like 217 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like
218 * to use that to align to PAGE_SIZE 218 * to use that to align to PAGE_SIZE
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index d4aa503caaa2..e1d106909218 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -32,7 +32,7 @@ enum {
32 GPS = (1<<30) 32 GPS = (1<<30)
33}; 33};
34 34
35#define PAGE_TESTBIT __pgprot(_PAGE_UNUSED1) 35#define PAGE_CPA_TEST __pgprot(_PAGE_CPA_TEST)
36 36
37static int pte_testbit(pte_t pte) 37static int pte_testbit(pte_t pte)
38{ 38{
@@ -118,6 +118,7 @@ static int pageattr_test(void)
118 unsigned int level; 118 unsigned int level;
119 int i, k; 119 int i, k;
120 int err; 120 int err;
121 unsigned long test_addr;
121 122
122 if (print) 123 if (print)
123 printk(KERN_INFO "CPA self-test:\n"); 124 printk(KERN_INFO "CPA self-test:\n");
@@ -172,7 +173,8 @@ static int pageattr_test(void)
172 continue; 173 continue;
173 } 174 }
174 175
175 err = change_page_attr_set(addr[i], len[i], PAGE_TESTBIT); 176 test_addr = addr[i];
177 err = change_page_attr_set(&test_addr, len[i], PAGE_CPA_TEST, 0);
176 if (err < 0) { 178 if (err < 0) {
177 printk(KERN_ERR "CPA %d failed %d\n", i, err); 179 printk(KERN_ERR "CPA %d failed %d\n", i, err);
178 failed++; 180 failed++;
@@ -204,7 +206,8 @@ static int pageattr_test(void)
204 failed++; 206 failed++;
205 continue; 207 continue;
206 } 208 }
207 err = change_page_attr_clear(addr[i], len[i], PAGE_TESTBIT); 209 test_addr = addr[i];
210 err = change_page_attr_clear(&test_addr, len[i], PAGE_CPA_TEST, 0);
208 if (err < 0) { 211 if (err < 0) {
209 printk(KERN_ERR "CPA reverting failed: %d\n", err); 212 printk(KERN_ERR "CPA reverting failed: %d\n", err);
210 failed++; 213 failed++;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 43e2f8483e4f..a9ec89c3fbca 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -25,15 +25,27 @@
25 * The current flushing context - we pass it instead of 5 arguments: 25 * The current flushing context - we pass it instead of 5 arguments:
26 */ 26 */
27struct cpa_data { 27struct cpa_data {
28 unsigned long vaddr; 28 unsigned long *vaddr;
29 pgprot_t mask_set; 29 pgprot_t mask_set;
30 pgprot_t mask_clr; 30 pgprot_t mask_clr;
31 int numpages; 31 int numpages;
32 int flushtlb; 32 int flags;
33 unsigned long pfn; 33 unsigned long pfn;
34 unsigned force_split : 1; 34 unsigned force_split : 1;
35 int curpage;
35}; 36};
36 37
38/*
39 * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
40 * using cpa_lock. So that we don't allow any other cpu, with stale large tlb
41 * entries change the page attribute in parallel to some other cpu
42 * splitting a large page entry along with changing the attribute.
43 */
44static DEFINE_SPINLOCK(cpa_lock);
45
46#define CPA_FLUSHTLB 1
47#define CPA_ARRAY 2
48
37#ifdef CONFIG_PROC_FS 49#ifdef CONFIG_PROC_FS
38static unsigned long direct_pages_count[PG_LEVEL_NUM]; 50static unsigned long direct_pages_count[PG_LEVEL_NUM];
39 51
@@ -84,7 +96,7 @@ static inline unsigned long highmap_start_pfn(void)
84 96
85static inline unsigned long highmap_end_pfn(void) 97static inline unsigned long highmap_end_pfn(void)
86{ 98{
87 return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; 99 return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
88} 100}
89 101
90#endif 102#endif
@@ -190,6 +202,41 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
190 } 202 }
191} 203}
192 204
205static void cpa_flush_array(unsigned long *start, int numpages, int cache)
206{
207 unsigned int i, level;
208 unsigned long *addr;
209
210 BUG_ON(irqs_disabled());
211
212 on_each_cpu(__cpa_flush_range, NULL, 1);
213
214 if (!cache)
215 return;
216
217 /* 4M threshold */
218 if (numpages >= 1024) {
219 if (boot_cpu_data.x86_model >= 4)
220 wbinvd();
221 return;
222 }
223 /*
224 * We only need to flush on one CPU,
225 * clflush is a MESI-coherent instruction that
226 * will cause all other CPUs to flush the same
227 * cachelines:
228 */
229 for (i = 0, addr = start; i < numpages; i++, addr++) {
230 pte_t *pte = lookup_address(*addr, &level);
231
232 /*
233 * Only flush present addresses:
234 */
235 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
236 clflush_cache_range((void *) *addr, PAGE_SIZE);
237 }
238}
239
193/* 240/*
194 * Certain areas of memory on x86 require very specific protection flags, 241 * Certain areas of memory on x86 require very specific protection flags,
195 * for example the BIOS area or kernel text. Callers don't always get this 242 * for example the BIOS area or kernel text. Callers don't always get this
@@ -398,7 +445,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
398 */ 445 */
399 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); 446 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
400 __set_pmd_pte(kpte, address, new_pte); 447 __set_pmd_pte(kpte, address, new_pte);
401 cpa->flushtlb = 1; 448 cpa->flags |= CPA_FLUSHTLB;
402 do_split = 0; 449 do_split = 0;
403 } 450 }
404 451
@@ -408,84 +455,6 @@ out_unlock:
408 return do_split; 455 return do_split;
409} 456}
410 457
411static LIST_HEAD(page_pool);
412static unsigned long pool_size, pool_pages, pool_low;
413static unsigned long pool_used, pool_failed;
414
415static void cpa_fill_pool(struct page **ret)
416{
417 gfp_t gfp = GFP_KERNEL;
418 unsigned long flags;
419 struct page *p;
420
421 /*
422 * Avoid recursion (on debug-pagealloc) and also signal
423 * our priority to get to these pagetables:
424 */
425 if (current->flags & PF_MEMALLOC)
426 return;
427 current->flags |= PF_MEMALLOC;
428
429 /*
430 * Allocate atomically from atomic contexts:
431 */
432 if (in_atomic() || irqs_disabled() || debug_pagealloc)
433 gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
434
435 while (pool_pages < pool_size || (ret && !*ret)) {
436 p = alloc_pages(gfp, 0);
437 if (!p) {
438 pool_failed++;
439 break;
440 }
441 /*
442 * If the call site needs a page right now, provide it:
443 */
444 if (ret && !*ret) {
445 *ret = p;
446 continue;
447 }
448 spin_lock_irqsave(&pgd_lock, flags);
449 list_add(&p->lru, &page_pool);
450 pool_pages++;
451 spin_unlock_irqrestore(&pgd_lock, flags);
452 }
453
454 current->flags &= ~PF_MEMALLOC;
455}
456
457#define SHIFT_MB (20 - PAGE_SHIFT)
458#define ROUND_MB_GB ((1 << 10) - 1)
459#define SHIFT_MB_GB 10
460#define POOL_PAGES_PER_GB 16
461
462void __init cpa_init(void)
463{
464 struct sysinfo si;
465 unsigned long gb;
466
467 si_meminfo(&si);
468 /*
469 * Calculate the number of pool pages:
470 *
471 * Convert totalram (nr of pages) to MiB and round to the next
472 * GiB. Shift MiB to Gib and multiply the result by
473 * POOL_PAGES_PER_GB:
474 */
475 if (debug_pagealloc) {
476 gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
477 pool_size = POOL_PAGES_PER_GB * gb;
478 } else {
479 pool_size = 1;
480 }
481 pool_low = pool_size;
482
483 cpa_fill_pool(NULL);
484 printk(KERN_DEBUG
485 "CPA: page pool initialized %lu of %lu pages preallocated\n",
486 pool_pages, pool_size);
487}
488
489static int split_large_page(pte_t *kpte, unsigned long address) 458static int split_large_page(pte_t *kpte, unsigned long address)
490{ 459{
491 unsigned long flags, pfn, pfninc = 1; 460 unsigned long flags, pfn, pfninc = 1;
@@ -494,28 +463,15 @@ static int split_large_page(pte_t *kpte, unsigned long address)
494 pgprot_t ref_prot; 463 pgprot_t ref_prot;
495 struct page *base; 464 struct page *base;
496 465
497 /* 466 if (!debug_pagealloc)
498 * Get a page from the pool. The pool list is protected by the 467 spin_unlock(&cpa_lock);
499 * pgd_lock, which we have to take anyway for the split 468 base = alloc_pages(GFP_KERNEL, 0);
500 * operation: 469 if (!debug_pagealloc)
501 */ 470 spin_lock(&cpa_lock);
502 spin_lock_irqsave(&pgd_lock, flags); 471 if (!base)
503 if (list_empty(&page_pool)) { 472 return -ENOMEM;
504 spin_unlock_irqrestore(&pgd_lock, flags);
505 base = NULL;
506 cpa_fill_pool(&base);
507 if (!base)
508 return -ENOMEM;
509 spin_lock_irqsave(&pgd_lock, flags);
510 } else {
511 base = list_first_entry(&page_pool, struct page, lru);
512 list_del(&base->lru);
513 pool_pages--;
514
515 if (pool_pages < pool_low)
516 pool_low = pool_pages;
517 }
518 473
474 spin_lock_irqsave(&pgd_lock, flags);
519 /* 475 /*
520 * Check for races, another CPU might have split this page 476 * Check for races, another CPU might have split this page
521 * up for us already: 477 * up for us already:
@@ -572,11 +528,8 @@ out_unlock:
572 * If we dropped out via the lookup_address check under 528 * If we dropped out via the lookup_address check under
573 * pgd_lock then stick the page back into the pool: 529 * pgd_lock then stick the page back into the pool:
574 */ 530 */
575 if (base) { 531 if (base)
576 list_add(&base->lru, &page_pool); 532 __free_page(base);
577 pool_pages++;
578 } else
579 pool_used++;
580 spin_unlock_irqrestore(&pgd_lock, flags); 533 spin_unlock_irqrestore(&pgd_lock, flags);
581 534
582 return 0; 535 return 0;
@@ -584,11 +537,16 @@ out_unlock:
584 537
585static int __change_page_attr(struct cpa_data *cpa, int primary) 538static int __change_page_attr(struct cpa_data *cpa, int primary)
586{ 539{
587 unsigned long address = cpa->vaddr; 540 unsigned long address;
588 int do_split, err; 541 int do_split, err;
589 unsigned int level; 542 unsigned int level;
590 pte_t *kpte, old_pte; 543 pte_t *kpte, old_pte;
591 544
545 if (cpa->flags & CPA_ARRAY)
546 address = cpa->vaddr[cpa->curpage];
547 else
548 address = *cpa->vaddr;
549
592repeat: 550repeat:
593 kpte = lookup_address(address, &level); 551 kpte = lookup_address(address, &level);
594 if (!kpte) 552 if (!kpte)
@@ -600,7 +558,7 @@ repeat:
600 return 0; 558 return 0;
601 WARN(1, KERN_WARNING "CPA: called for zero pte. " 559 WARN(1, KERN_WARNING "CPA: called for zero pte. "
602 "vaddr = %lx cpa->vaddr = %lx\n", address, 560 "vaddr = %lx cpa->vaddr = %lx\n", address,
603 cpa->vaddr); 561 *cpa->vaddr);
604 return -EINVAL; 562 return -EINVAL;
605 } 563 }
606 564
@@ -626,7 +584,7 @@ repeat:
626 */ 584 */
627 if (pte_val(old_pte) != pte_val(new_pte)) { 585 if (pte_val(old_pte) != pte_val(new_pte)) {
628 set_pte_atomic(kpte, new_pte); 586 set_pte_atomic(kpte, new_pte);
629 cpa->flushtlb = 1; 587 cpa->flags |= CPA_FLUSHTLB;
630 } 588 }
631 cpa->numpages = 1; 589 cpa->numpages = 1;
632 return 0; 590 return 0;
@@ -650,7 +608,25 @@ repeat:
650 */ 608 */
651 err = split_large_page(kpte, address); 609 err = split_large_page(kpte, address);
652 if (!err) { 610 if (!err) {
653 cpa->flushtlb = 1; 611 /*
612 * Do a global flush tlb after splitting the large page
613 * and before we do the actual change page attribute in the PTE.
614 *
615 * With out this, we violate the TLB application note, that says
616 * "The TLBs may contain both ordinary and large-page
617 * translations for a 4-KByte range of linear addresses. This
618 * may occur if software modifies the paging structures so that
619 * the page size used for the address range changes. If the two
620 * translations differ with respect to page frame or attributes
621 * (e.g., permissions), processor behavior is undefined and may
622 * be implementation-specific."
623 *
624 * We do this global tlb flush inside the cpa_lock, so that we
625 * don't allow any other cpu, with stale tlb entries change the
626 * page attribute in parallel, that also falls into the
627 * just split large page entry.
628 */
629 flush_tlb_all();
654 goto repeat; 630 goto repeat;
655 } 631 }
656 632
@@ -663,6 +639,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
663{ 639{
664 struct cpa_data alias_cpa; 640 struct cpa_data alias_cpa;
665 int ret = 0; 641 int ret = 0;
642 unsigned long temp_cpa_vaddr, vaddr;
666 643
667 if (cpa->pfn >= max_pfn_mapped) 644 if (cpa->pfn >= max_pfn_mapped)
668 return 0; 645 return 0;
@@ -675,16 +652,24 @@ static int cpa_process_alias(struct cpa_data *cpa)
675 * No need to redo, when the primary call touched the direct 652 * No need to redo, when the primary call touched the direct
676 * mapping already: 653 * mapping already:
677 */ 654 */
678 if (!(within(cpa->vaddr, PAGE_OFFSET, 655 if (cpa->flags & CPA_ARRAY)
656 vaddr = cpa->vaddr[cpa->curpage];
657 else
658 vaddr = *cpa->vaddr;
659
660 if (!(within(vaddr, PAGE_OFFSET,
679 PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) 661 PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
680#ifdef CONFIG_X86_64 662#ifdef CONFIG_X86_64
681 || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32), 663 || within(vaddr, PAGE_OFFSET + (1UL<<32),
682 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) 664 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
683#endif 665#endif
684 )) { 666 )) {
685 667
686 alias_cpa = *cpa; 668 alias_cpa = *cpa;
687 alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); 669 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
670 alias_cpa.vaddr = &temp_cpa_vaddr;
671 alias_cpa.flags &= ~CPA_ARRAY;
672
688 673
689 ret = __change_page_attr_set_clr(&alias_cpa, 0); 674 ret = __change_page_attr_set_clr(&alias_cpa, 0);
690 } 675 }
@@ -696,7 +681,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
696 * No need to redo, when the primary call touched the high 681 * No need to redo, when the primary call touched the high
697 * mapping already: 682 * mapping already:
698 */ 683 */
699 if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end)) 684 if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
700 return 0; 685 return 0;
701 686
702 /* 687 /*
@@ -707,8 +692,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
707 return 0; 692 return 0;
708 693
709 alias_cpa = *cpa; 694 alias_cpa = *cpa;
710 alias_cpa.vaddr = 695 temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
711 (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; 696 alias_cpa.vaddr = &temp_cpa_vaddr;
697 alias_cpa.flags &= ~CPA_ARRAY;
712 698
713 /* 699 /*
714 * The high mapping range is imprecise, so ignore the return value. 700 * The high mapping range is imprecise, so ignore the return value.
@@ -728,8 +714,15 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
728 * preservation check. 714 * preservation check.
729 */ 715 */
730 cpa->numpages = numpages; 716 cpa->numpages = numpages;
717 /* for array changes, we can't use large page */
718 if (cpa->flags & CPA_ARRAY)
719 cpa->numpages = 1;
731 720
721 if (!debug_pagealloc)
722 spin_lock(&cpa_lock);
732 ret = __change_page_attr(cpa, checkalias); 723 ret = __change_page_attr(cpa, checkalias);
724 if (!debug_pagealloc)
725 spin_unlock(&cpa_lock);
733 if (ret) 726 if (ret)
734 return ret; 727 return ret;
735 728
@@ -746,7 +739,11 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
746 */ 739 */
747 BUG_ON(cpa->numpages > numpages); 740 BUG_ON(cpa->numpages > numpages);
748 numpages -= cpa->numpages; 741 numpages -= cpa->numpages;
749 cpa->vaddr += cpa->numpages * PAGE_SIZE; 742 if (cpa->flags & CPA_ARRAY)
743 cpa->curpage++;
744 else
745 *cpa->vaddr += cpa->numpages * PAGE_SIZE;
746
750 } 747 }
751 return 0; 748 return 0;
752} 749}
@@ -757,9 +754,9 @@ static inline int cache_attr(pgprot_t attr)
757 (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); 754 (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
758} 755}
759 756
760static int change_page_attr_set_clr(unsigned long addr, int numpages, 757static int change_page_attr_set_clr(unsigned long *addr, int numpages,
761 pgprot_t mask_set, pgprot_t mask_clr, 758 pgprot_t mask_set, pgprot_t mask_clr,
762 int force_split) 759 int force_split, int array)
763{ 760{
764 struct cpa_data cpa; 761 struct cpa_data cpa;
765 int ret, cache, checkalias; 762 int ret, cache, checkalias;
@@ -774,21 +771,38 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
774 return 0; 771 return 0;
775 772
776 /* Ensure we are PAGE_SIZE aligned */ 773 /* Ensure we are PAGE_SIZE aligned */
777 if (addr & ~PAGE_MASK) { 774 if (!array) {
778 addr &= PAGE_MASK; 775 if (*addr & ~PAGE_MASK) {
779 /* 776 *addr &= PAGE_MASK;
780 * People should not be passing in unaligned addresses: 777 /*
781 */ 778 * People should not be passing in unaligned addresses:
782 WARN_ON_ONCE(1); 779 */
780 WARN_ON_ONCE(1);
781 }
782 } else {
783 int i;
784 for (i = 0; i < numpages; i++) {
785 if (addr[i] & ~PAGE_MASK) {
786 addr[i] &= PAGE_MASK;
787 WARN_ON_ONCE(1);
788 }
789 }
783 } 790 }
784 791
792 /* Must avoid aliasing mappings in the highmem code */
793 kmap_flush_unused();
794
785 cpa.vaddr = addr; 795 cpa.vaddr = addr;
786 cpa.numpages = numpages; 796 cpa.numpages = numpages;
787 cpa.mask_set = mask_set; 797 cpa.mask_set = mask_set;
788 cpa.mask_clr = mask_clr; 798 cpa.mask_clr = mask_clr;
789 cpa.flushtlb = 0; 799 cpa.flags = 0;
800 cpa.curpage = 0;
790 cpa.force_split = force_split; 801 cpa.force_split = force_split;
791 802
803 if (array)
804 cpa.flags |= CPA_ARRAY;
805
792 /* No alias checking for _NX bit modifications */ 806 /* No alias checking for _NX bit modifications */
793 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; 807 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
794 808
@@ -797,7 +811,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
797 /* 811 /*
798 * Check whether we really changed something: 812 * Check whether we really changed something:
799 */ 813 */
800 if (!cpa.flushtlb) 814 if (!(cpa.flags & CPA_FLUSHTLB))
801 goto out; 815 goto out;
802 816
803 /* 817 /*
@@ -812,27 +826,30 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
812 * error case we fall back to cpa_flush_all (which uses 826 * error case we fall back to cpa_flush_all (which uses
813 * wbindv): 827 * wbindv):
814 */ 828 */
815 if (!ret && cpu_has_clflush) 829 if (!ret && cpu_has_clflush) {
816 cpa_flush_range(addr, numpages, cache); 830 if (cpa.flags & CPA_ARRAY)
817 else 831 cpa_flush_array(addr, numpages, cache);
832 else
833 cpa_flush_range(*addr, numpages, cache);
834 } else
818 cpa_flush_all(cache); 835 cpa_flush_all(cache);
819 836
820out: 837out:
821 cpa_fill_pool(NULL);
822
823 return ret; 838 return ret;
824} 839}
825 840
826static inline int change_page_attr_set(unsigned long addr, int numpages, 841static inline int change_page_attr_set(unsigned long *addr, int numpages,
827 pgprot_t mask) 842 pgprot_t mask, int array)
828{ 843{
829 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0); 844 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
845 array);
830} 846}
831 847
832static inline int change_page_attr_clear(unsigned long addr, int numpages, 848static inline int change_page_attr_clear(unsigned long *addr, int numpages,
833 pgprot_t mask) 849 pgprot_t mask, int array)
834{ 850{
835 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0); 851 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
852 array);
836} 853}
837 854
838int _set_memory_uc(unsigned long addr, int numpages) 855int _set_memory_uc(unsigned long addr, int numpages)
@@ -840,8 +857,8 @@ int _set_memory_uc(unsigned long addr, int numpages)
840 /* 857 /*
841 * for now UC MINUS. see comments in ioremap_nocache() 858 * for now UC MINUS. see comments in ioremap_nocache()
842 */ 859 */
843 return change_page_attr_set(addr, numpages, 860 return change_page_attr_set(&addr, numpages,
844 __pgprot(_PAGE_CACHE_UC_MINUS)); 861 __pgprot(_PAGE_CACHE_UC_MINUS), 0);
845} 862}
846 863
847int set_memory_uc(unsigned long addr, int numpages) 864int set_memory_uc(unsigned long addr, int numpages)
@@ -857,10 +874,48 @@ int set_memory_uc(unsigned long addr, int numpages)
857} 874}
858EXPORT_SYMBOL(set_memory_uc); 875EXPORT_SYMBOL(set_memory_uc);
859 876
877int set_memory_array_uc(unsigned long *addr, int addrinarray)
878{
879 unsigned long start;
880 unsigned long end;
881 int i;
882 /*
883 * for now UC MINUS. see comments in ioremap_nocache()
884 */
885 for (i = 0; i < addrinarray; i++) {
886 start = __pa(addr[i]);
887 for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
888 if (end != __pa(addr[i + 1]))
889 break;
890 i++;
891 }
892 if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
893 goto out;
894 }
895
896 return change_page_attr_set(addr, addrinarray,
897 __pgprot(_PAGE_CACHE_UC_MINUS), 1);
898out:
899 for (i = 0; i < addrinarray; i++) {
900 unsigned long tmp = __pa(addr[i]);
901
902 if (tmp == start)
903 break;
904 for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
905 if (end != __pa(addr[i + 1]))
906 break;
907 i++;
908 }
909 free_memtype(tmp, end);
910 }
911 return -EINVAL;
912}
913EXPORT_SYMBOL(set_memory_array_uc);
914
860int _set_memory_wc(unsigned long addr, int numpages) 915int _set_memory_wc(unsigned long addr, int numpages)
861{ 916{
862 return change_page_attr_set(addr, numpages, 917 return change_page_attr_set(&addr, numpages,
863 __pgprot(_PAGE_CACHE_WC)); 918 __pgprot(_PAGE_CACHE_WC), 0);
864} 919}
865 920
866int set_memory_wc(unsigned long addr, int numpages) 921int set_memory_wc(unsigned long addr, int numpages)
@@ -878,8 +933,8 @@ EXPORT_SYMBOL(set_memory_wc);
878 933
879int _set_memory_wb(unsigned long addr, int numpages) 934int _set_memory_wb(unsigned long addr, int numpages)
880{ 935{
881 return change_page_attr_clear(addr, numpages, 936 return change_page_attr_clear(&addr, numpages,
882 __pgprot(_PAGE_CACHE_MASK)); 937 __pgprot(_PAGE_CACHE_MASK), 0);
883} 938}
884 939
885int set_memory_wb(unsigned long addr, int numpages) 940int set_memory_wb(unsigned long addr, int numpages)
@@ -890,37 +945,59 @@ int set_memory_wb(unsigned long addr, int numpages)
890} 945}
891EXPORT_SYMBOL(set_memory_wb); 946EXPORT_SYMBOL(set_memory_wb);
892 947
948int set_memory_array_wb(unsigned long *addr, int addrinarray)
949{
950 int i;
951
952 for (i = 0; i < addrinarray; i++) {
953 unsigned long start = __pa(addr[i]);
954 unsigned long end;
955
956 for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
957 if (end != __pa(addr[i + 1]))
958 break;
959 i++;
960 }
961 free_memtype(start, end);
962 }
963 return change_page_attr_clear(addr, addrinarray,
964 __pgprot(_PAGE_CACHE_MASK), 1);
965}
966EXPORT_SYMBOL(set_memory_array_wb);
967
893int set_memory_x(unsigned long addr, int numpages) 968int set_memory_x(unsigned long addr, int numpages)
894{ 969{
895 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX)); 970 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
896} 971}
897EXPORT_SYMBOL(set_memory_x); 972EXPORT_SYMBOL(set_memory_x);
898 973
899int set_memory_nx(unsigned long addr, int numpages) 974int set_memory_nx(unsigned long addr, int numpages)
900{ 975{
901 return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX)); 976 return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
902} 977}
903EXPORT_SYMBOL(set_memory_nx); 978EXPORT_SYMBOL(set_memory_nx);
904 979
905int set_memory_ro(unsigned long addr, int numpages) 980int set_memory_ro(unsigned long addr, int numpages)
906{ 981{
907 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW)); 982 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
908} 983}
984EXPORT_SYMBOL_GPL(set_memory_ro);
909 985
910int set_memory_rw(unsigned long addr, int numpages) 986int set_memory_rw(unsigned long addr, int numpages)
911{ 987{
912 return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW)); 988 return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
913} 989}
990EXPORT_SYMBOL_GPL(set_memory_rw);
914 991
915int set_memory_np(unsigned long addr, int numpages) 992int set_memory_np(unsigned long addr, int numpages)
916{ 993{
917 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT)); 994 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
918} 995}
919 996
920int set_memory_4k(unsigned long addr, int numpages) 997int set_memory_4k(unsigned long addr, int numpages)
921{ 998{
922 return change_page_attr_set_clr(addr, numpages, __pgprot(0), 999 return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
923 __pgprot(0), 1); 1000 __pgprot(0), 1, 0);
924} 1001}
925 1002
926int set_pages_uc(struct page *page, int numpages) 1003int set_pages_uc(struct page *page, int numpages)
@@ -973,22 +1050,38 @@ int set_pages_rw(struct page *page, int numpages)
973 1050
974static int __set_pages_p(struct page *page, int numpages) 1051static int __set_pages_p(struct page *page, int numpages)
975{ 1052{
976 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), 1053 unsigned long tempaddr = (unsigned long) page_address(page);
1054 struct cpa_data cpa = { .vaddr = &tempaddr,
977 .numpages = numpages, 1055 .numpages = numpages,
978 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), 1056 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
979 .mask_clr = __pgprot(0)}; 1057 .mask_clr = __pgprot(0),
1058 .flags = 0};
980 1059
981 return __change_page_attr_set_clr(&cpa, 1); 1060 /*
1061 * No alias checking needed for setting present flag. otherwise,
1062 * we may need to break large pages for 64-bit kernel text
1063 * mappings (this adds to complexity if we want to do this from
1064 * atomic context especially). Let's keep it simple!
1065 */
1066 return __change_page_attr_set_clr(&cpa, 0);
982} 1067}
983 1068
984static int __set_pages_np(struct page *page, int numpages) 1069static int __set_pages_np(struct page *page, int numpages)
985{ 1070{
986 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), 1071 unsigned long tempaddr = (unsigned long) page_address(page);
1072 struct cpa_data cpa = { .vaddr = &tempaddr,
987 .numpages = numpages, 1073 .numpages = numpages,
988 .mask_set = __pgprot(0), 1074 .mask_set = __pgprot(0),
989 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)}; 1075 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
1076 .flags = 0};
990 1077
991 return __change_page_attr_set_clr(&cpa, 1); 1078 /*
1079 * No alias checking needed for setting not present flag. otherwise,
1080 * we may need to break large pages for 64-bit kernel text
1081 * mappings (this adds to complexity if we want to do this from
1082 * atomic context especially). Let's keep it simple!
1083 */
1084 return __change_page_attr_set_clr(&cpa, 0);
992} 1085}
993 1086
994void kernel_map_pages(struct page *page, int numpages, int enable) 1087void kernel_map_pages(struct page *page, int numpages, int enable)
@@ -1008,11 +1101,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1008 1101
1009 /* 1102 /*
1010 * The return value is ignored as the calls cannot fail. 1103 * The return value is ignored as the calls cannot fail.
1011 * Large pages are kept enabled at boot time, and are 1104 * Large pages for identity mappings are not used at boot time
1012 * split up quickly with DEBUG_PAGEALLOC. If a splitup 1105 * and hence no memory allocations during large page split.
1013 * fails here (due to temporary memory shortage) no damage
1014 * is done because we just keep the largepage intact up
1015 * to the next attempt when it will likely be split up:
1016 */ 1106 */
1017 if (enable) 1107 if (enable)
1018 __set_pages_p(page, numpages); 1108 __set_pages_p(page, numpages);
@@ -1024,53 +1114,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1024 * but that can deadlock->flush only current cpu: 1114 * but that can deadlock->flush only current cpu:
1025 */ 1115 */
1026 __flush_tlb_all(); 1116 __flush_tlb_all();
1027
1028 /*
1029 * Try to refill the page pool here. We can do this only after
1030 * the tlb flush.
1031 */
1032 cpa_fill_pool(NULL);
1033} 1117}
1034 1118
1035#ifdef CONFIG_DEBUG_FS
1036static int dpa_show(struct seq_file *m, void *v)
1037{
1038 seq_puts(m, "DEBUG_PAGEALLOC\n");
1039 seq_printf(m, "pool_size : %lu\n", pool_size);
1040 seq_printf(m, "pool_pages : %lu\n", pool_pages);
1041 seq_printf(m, "pool_low : %lu\n", pool_low);
1042 seq_printf(m, "pool_used : %lu\n", pool_used);
1043 seq_printf(m, "pool_failed : %lu\n", pool_failed);
1044
1045 return 0;
1046}
1047
1048static int dpa_open(struct inode *inode, struct file *filp)
1049{
1050 return single_open(filp, dpa_show, NULL);
1051}
1052
1053static const struct file_operations dpa_fops = {
1054 .open = dpa_open,
1055 .read = seq_read,
1056 .llseek = seq_lseek,
1057 .release = single_release,
1058};
1059
1060static int __init debug_pagealloc_proc_init(void)
1061{
1062 struct dentry *de;
1063
1064 de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL,
1065 &dpa_fops);
1066 if (!de)
1067 return -ENOMEM;
1068
1069 return 0;
1070}
1071__initcall(debug_pagealloc_proc_init);
1072#endif
1073
1074#ifdef CONFIG_HIBERNATION 1119#ifdef CONFIG_HIBERNATION
1075 1120
1076bool kernel_page_present(struct page *page) 1121bool kernel_page_present(struct page *page)
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 2a50e0fa64a5..738fd0f24958 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -7,24 +7,24 @@
7 * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. 7 * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
8 */ 8 */
9 9
10#include <linux/mm.h> 10#include <linux/seq_file.h>
11#include <linux/bootmem.h>
12#include <linux/debugfs.h>
11#include <linux/kernel.h> 13#include <linux/kernel.h>
12#include <linux/gfp.h> 14#include <linux/gfp.h>
15#include <linux/mm.h>
13#include <linux/fs.h> 16#include <linux/fs.h>
14#include <linux/bootmem.h>
15#include <linux/debugfs.h>
16#include <linux/seq_file.h>
17 17
18#include <asm/msr.h> 18#include <asm/cacheflush.h>
19#include <asm/tlbflush.h>
20#include <asm/processor.h> 19#include <asm/processor.h>
21#include <asm/page.h> 20#include <asm/tlbflush.h>
22#include <asm/pgtable.h> 21#include <asm/pgtable.h>
23#include <asm/pat.h>
24#include <asm/e820.h>
25#include <asm/cacheflush.h>
26#include <asm/fcntl.h> 22#include <asm/fcntl.h>
23#include <asm/e820.h>
27#include <asm/mtrr.h> 24#include <asm/mtrr.h>
25#include <asm/page.h>
26#include <asm/msr.h>
27#include <asm/pat.h>
28#include <asm/io.h> 28#include <asm/io.h>
29 29
30#ifdef CONFIG_X86_PAT 30#ifdef CONFIG_X86_PAT
@@ -46,6 +46,7 @@ early_param("nopat", nopat);
46 46
47 47
48static int debug_enable; 48static int debug_enable;
49
49static int __init pat_debug_setup(char *str) 50static int __init pat_debug_setup(char *str)
50{ 51{
51 debug_enable = 1; 52 debug_enable = 1;
@@ -145,14 +146,14 @@ static char *cattr_name(unsigned long flags)
145 */ 146 */
146 147
147struct memtype { 148struct memtype {
148 u64 start; 149 u64 start;
149 u64 end; 150 u64 end;
150 unsigned long type; 151 unsigned long type;
151 struct list_head nd; 152 struct list_head nd;
152}; 153};
153 154
154static LIST_HEAD(memtype_list); 155static LIST_HEAD(memtype_list);
155static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ 156static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
156 157
157/* 158/*
158 * Does intersection of PAT memory type and MTRR memory type and returns 159 * Does intersection of PAT memory type and MTRR memory type and returns
@@ -180,8 +181,8 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
180 return req_type; 181 return req_type;
181} 182}
182 183
183static int chk_conflict(struct memtype *new, struct memtype *entry, 184static int
184 unsigned long *type) 185chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
185{ 186{
186 if (new->type != entry->type) { 187 if (new->type != entry->type) {
187 if (type) { 188 if (type) {
@@ -211,6 +212,66 @@ static struct memtype *cached_entry;
211static u64 cached_start; 212static u64 cached_start;
212 213
213/* 214/*
215 * For RAM pages, mark the pages as non WB memory type using
216 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
217 * set_memory_wc() on a RAM page at a time before marking it as WB again.
218 * This is ok, because only one driver will be owning the page and
219 * doing set_memory_*() calls.
220 *
221 * For now, we use PageNonWB to track that the RAM page is being mapped
222 * as non WB. In future, we will have to use one more flag
223 * (or some other mechanism in page_struct) to distinguish between
224 * UC and WC mapping.
225 */
226static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
227 unsigned long *new_type)
228{
229 struct page *page;
230 u64 pfn, end_pfn;
231
232 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
233 page = pfn_to_page(pfn);
234 if (page_mapped(page) || PageNonWB(page))
235 goto out;
236
237 SetPageNonWB(page);
238 }
239 return 0;
240
241out:
242 end_pfn = pfn;
243 for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
244 page = pfn_to_page(pfn);
245 ClearPageNonWB(page);
246 }
247
248 return -EINVAL;
249}
250
251static int free_ram_pages_type(u64 start, u64 end)
252{
253 struct page *page;
254 u64 pfn, end_pfn;
255
256 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
257 page = pfn_to_page(pfn);
258 if (page_mapped(page) || !PageNonWB(page))
259 goto out;
260
261 ClearPageNonWB(page);
262 }
263 return 0;
264
265out:
266 end_pfn = pfn;
267 for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
268 page = pfn_to_page(pfn);
269 SetPageNonWB(page);
270 }
271 return -EINVAL;
272}
273
274/*
214 * req_type typically has one of the: 275 * req_type typically has one of the:
215 * - _PAGE_CACHE_WB 276 * - _PAGE_CACHE_WB
216 * - _PAGE_CACHE_WC 277 * - _PAGE_CACHE_WC
@@ -226,14 +287,15 @@ static u64 cached_start;
226 * it will return a negative return value. 287 * it will return a negative return value.
227 */ 288 */
228int reserve_memtype(u64 start, u64 end, unsigned long req_type, 289int reserve_memtype(u64 start, u64 end, unsigned long req_type,
229 unsigned long *new_type) 290 unsigned long *new_type)
230{ 291{
231 struct memtype *new, *entry; 292 struct memtype *new, *entry;
232 unsigned long actual_type; 293 unsigned long actual_type;
233 struct list_head *where; 294 struct list_head *where;
295 int is_range_ram;
234 int err = 0; 296 int err = 0;
235 297
236 BUG_ON(start >= end); /* end is exclusive */ 298 BUG_ON(start >= end); /* end is exclusive */
237 299
238 if (!pat_enabled) { 300 if (!pat_enabled) {
239 /* This is identical to page table setting without PAT */ 301 /* This is identical to page table setting without PAT */
@@ -266,17 +328,24 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
266 actual_type = _PAGE_CACHE_WB; 328 actual_type = _PAGE_CACHE_WB;
267 else 329 else
268 actual_type = _PAGE_CACHE_UC_MINUS; 330 actual_type = _PAGE_CACHE_UC_MINUS;
269 } else 331 } else {
270 actual_type = pat_x_mtrr_type(start, end, 332 actual_type = pat_x_mtrr_type(start, end,
271 req_type & _PAGE_CACHE_MASK); 333 req_type & _PAGE_CACHE_MASK);
334 }
335
336 is_range_ram = pagerange_is_ram(start, end);
337 if (is_range_ram == 1)
338 return reserve_ram_pages_type(start, end, req_type, new_type);
339 else if (is_range_ram < 0)
340 return -EINVAL;
272 341
273 new = kmalloc(sizeof(struct memtype), GFP_KERNEL); 342 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
274 if (!new) 343 if (!new)
275 return -ENOMEM; 344 return -ENOMEM;
276 345
277 new->start = start; 346 new->start = start;
278 new->end = end; 347 new->end = end;
279 new->type = actual_type; 348 new->type = actual_type;
280 349
281 if (new_type) 350 if (new_type)
282 *new_type = actual_type; 351 *new_type = actual_type;
@@ -335,6 +404,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
335 start, end, cattr_name(new->type), cattr_name(req_type)); 404 start, end, cattr_name(new->type), cattr_name(req_type));
336 kfree(new); 405 kfree(new);
337 spin_unlock(&memtype_lock); 406 spin_unlock(&memtype_lock);
407
338 return err; 408 return err;
339 } 409 }
340 410
@@ -358,6 +428,7 @@ int free_memtype(u64 start, u64 end)
358{ 428{
359 struct memtype *entry; 429 struct memtype *entry;
360 int err = -EINVAL; 430 int err = -EINVAL;
431 int is_range_ram;
361 432
362 if (!pat_enabled) 433 if (!pat_enabled)
363 return 0; 434 return 0;
@@ -366,6 +437,12 @@ int free_memtype(u64 start, u64 end)
366 if (is_ISA_range(start, end - 1)) 437 if (is_ISA_range(start, end - 1))
367 return 0; 438 return 0;
368 439
440 is_range_ram = pagerange_is_ram(start, end);
441 if (is_range_ram == 1)
442 return free_ram_pages_type(start, end);
443 else if (is_range_ram < 0)
444 return -EINVAL;
445
369 spin_lock(&memtype_lock); 446 spin_lock(&memtype_lock);
370 list_for_each_entry(entry, &memtype_list, nd) { 447 list_for_each_entry(entry, &memtype_list, nd) {
371 if (entry->start == start && entry->end == end) { 448 if (entry->start == start && entry->end == end) {
@@ -386,6 +463,7 @@ int free_memtype(u64 start, u64 end)
386 } 463 }
387 464
388 dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); 465 dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end);
466
389 return err; 467 return err;
390} 468}
391 469
@@ -492,9 +570,9 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
492 570
493void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) 571void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
494{ 572{
573 unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
495 u64 addr = (u64)pfn << PAGE_SHIFT; 574 u64 addr = (u64)pfn << PAGE_SHIFT;
496 unsigned long flags; 575 unsigned long flags;
497 unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
498 576
499 reserve_memtype(addr, addr + size, want_flags, &flags); 577 reserve_memtype(addr, addr + size, want_flags, &flags);
500 if (flags != want_flags) { 578 if (flags != want_flags) {
@@ -514,7 +592,7 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
514 free_memtype(addr, addr + size); 592 free_memtype(addr, addr + size);
515} 593}
516 594
517#if defined(CONFIG_DEBUG_FS) 595#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
518 596
519/* get Nth element of the linked list */ 597/* get Nth element of the linked list */
520static struct memtype *memtype_get_idx(loff_t pos) 598static struct memtype *memtype_get_idx(loff_t pos)
@@ -537,6 +615,7 @@ static struct memtype *memtype_get_idx(loff_t pos)
537 } 615 }
538 spin_unlock(&memtype_lock); 616 spin_unlock(&memtype_lock);
539 kfree(print_entry); 617 kfree(print_entry);
618
540 return NULL; 619 return NULL;
541} 620}
542 621
@@ -567,6 +646,7 @@ static int memtype_seq_show(struct seq_file *seq, void *v)
567 seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), 646 seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
568 print_entry->start, print_entry->end); 647 print_entry->start, print_entry->end);
569 kfree(print_entry); 648 kfree(print_entry);
649
570 return 0; 650 return 0;
571} 651}
572 652
@@ -598,4 +678,4 @@ static int __init pat_memtype_list_init(void)
598 678
599late_initcall(pat_memtype_list_init); 679late_initcall(pat_memtype_list_init);
600 680
601#endif /* CONFIG_DEBUG_FS */ 681#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index d50302774fe2..86f2ffc43c3d 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -63,10 +63,8 @@ static inline void pgd_list_del(pgd_t *pgd)
63#define UNSHARED_PTRS_PER_PGD \ 63#define UNSHARED_PTRS_PER_PGD \
64 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) 64 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
65 65
66static void pgd_ctor(void *p) 66static void pgd_ctor(pgd_t *pgd)
67{ 67{
68 pgd_t *pgd = p;
69
70 /* If the pgd points to a shared pagetable level (either the 68 /* If the pgd points to a shared pagetable level (either the
71 ptes in non-PAE, or shared PMD in PAE), then just copy the 69 ptes in non-PAE, or shared PMD in PAE), then just copy the
72 references from swapper_pg_dir. */ 70 references from swapper_pg_dir. */
@@ -87,7 +85,7 @@ static void pgd_ctor(void *p)
87 pgd_list_add(pgd); 85 pgd_list_add(pgd);
88} 86}
89 87
90static void pgd_dtor(void *pgd) 88static void pgd_dtor(pgd_t *pgd)
91{ 89{
92 unsigned long flags; /* can be called from interrupt context */ 90 unsigned long flags; /* can be called from interrupt context */
93 91
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index cab0abbd1ebe..0951db9ee519 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -123,7 +123,8 @@ static int __init parse_vmalloc(char *arg)
123 if (!arg) 123 if (!arg)
124 return -EINVAL; 124 return -EINVAL;
125 125
126 __VMALLOC_RESERVE = memparse(arg, &arg); 126 /* Add VMALLOC_OFFSET to the parsed value due to vm area guard hole*/
127 __VMALLOC_RESERVE = memparse(arg, &arg) + VMALLOC_OFFSET;
127 return 0; 128 return 0;
128} 129}
129early_param("vmalloc", parse_vmalloc); 130early_param("vmalloc", parse_vmalloc);
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 0227694f7dab..8a5f1614a3d5 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -295,10 +295,12 @@ static void nmi_cpu_shutdown(void *dummy)
295 295
296static void nmi_shutdown(void) 296static void nmi_shutdown(void)
297{ 297{
298 struct op_msrs *msrs = &get_cpu_var(cpu_msrs); 298 struct op_msrs *msrs;
299
299 nmi_enabled = 0; 300 nmi_enabled = 0;
300 on_each_cpu(nmi_cpu_shutdown, NULL, 1); 301 on_each_cpu(nmi_cpu_shutdown, NULL, 1);
301 unregister_die_notifier(&profile_exceptions_nb); 302 unregister_die_notifier(&profile_exceptions_nb);
303 msrs = &get_cpu_var(cpu_msrs);
302 model->shutdown(msrs); 304 model->shutdown(msrs);
303 free_msrs(); 305 free_msrs();
304 put_cpu_var(cpu_msrs); 306 put_cpu_var(cpu_msrs);
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 56b4757a1f47..43ac5af338d8 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -10,11 +10,12 @@
10 10
11#include <linux/oprofile.h> 11#include <linux/oprofile.h>
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <linux/ptrace.h>
14#include <linux/nmi.h>
13#include <asm/msr.h> 15#include <asm/msr.h>
14#include <asm/ptrace.h>
15#include <asm/fixmap.h> 16#include <asm/fixmap.h>
16#include <asm/apic.h> 17#include <asm/apic.h>
17#include <asm/nmi.h> 18
18 19
19#include "op_x86_model.h" 20#include "op_x86_model.h"
20#include "op_counter.h" 21#include "op_counter.h"
@@ -40,7 +41,7 @@ static unsigned int num_controls = NUM_CONTROLS_NON_HT;
40static inline void setup_num_counters(void) 41static inline void setup_num_counters(void)
41{ 42{
42#ifdef CONFIG_SMP 43#ifdef CONFIG_SMP
43 if (smp_num_siblings == 2){ 44 if (smp_num_siblings == 2) {
44 num_counters = NUM_COUNTERS_HT2; 45 num_counters = NUM_COUNTERS_HT2;
45 num_controls = NUM_CONTROLS_HT2; 46 num_controls = NUM_CONTROLS_HT2;
46 } 47 }
@@ -86,7 +87,7 @@ struct p4_event_binding {
86#define CTR_FLAME_2 (1 << 6) 87#define CTR_FLAME_2 (1 << 6)
87#define CTR_IQ_5 (1 << 7) 88#define CTR_IQ_5 (1 << 7)
88 89
89static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { 90static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
90 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, 91 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
91 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, 92 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
92 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, 93 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
@@ -97,32 +98,32 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
97 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } 98 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
98}; 99};
99 100
100#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT 101#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
101 102
102/* p4 event codes in libop/op_event.h are indices into this table. */ 103/* p4 event codes in libop/op_event.h are indices into this table. */
103 104
104static struct p4_event_binding p4_events[NUM_EVENTS] = { 105static struct p4_event_binding p4_events[NUM_EVENTS] = {
105 106
106 { /* BRANCH_RETIRED */ 107 { /* BRANCH_RETIRED */
107 0x05, 0x06, 108 0x05, 0x06,
108 { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, 109 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
109 {CTR_IQ_5, MSR_P4_CRU_ESCR3} } 110 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
110 }, 111 },
111 112
112 { /* MISPRED_BRANCH_RETIRED */ 113 { /* MISPRED_BRANCH_RETIRED */
113 0x04, 0x03, 114 0x04, 0x03,
114 { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, 115 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
115 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 116 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
116 }, 117 },
117 118
118 { /* TC_DELIVER_MODE */ 119 { /* TC_DELIVER_MODE */
119 0x01, 0x01, 120 0x01, 0x01,
120 { { CTR_MS_0, MSR_P4_TC_ESCR0}, 121 { { CTR_MS_0, MSR_P4_TC_ESCR0},
121 { CTR_MS_2, MSR_P4_TC_ESCR1} } 122 { CTR_MS_2, MSR_P4_TC_ESCR1} }
122 }, 123 },
123 124
124 { /* BPU_FETCH_REQUEST */ 125 { /* BPU_FETCH_REQUEST */
125 0x00, 0x03, 126 0x00, 0x03,
126 { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, 127 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
127 { CTR_BPU_2, MSR_P4_BPU_ESCR1} } 128 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
128 }, 129 },
@@ -146,7 +147,7 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
146 }, 147 },
147 148
148 { /* LOAD_PORT_REPLAY */ 149 { /* LOAD_PORT_REPLAY */
149 0x02, 0x04, 150 0x02, 0x04,
150 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, 151 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
151 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } 152 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
152 }, 153 },
@@ -170,43 +171,43 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
170 }, 171 },
171 172
172 { /* BSQ_CACHE_REFERENCE */ 173 { /* BSQ_CACHE_REFERENCE */
173 0x07, 0x0c, 174 0x07, 0x0c,
174 { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, 175 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
175 { CTR_BPU_2, MSR_P4_BSU_ESCR1} } 176 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
176 }, 177 },
177 178
178 { /* IOQ_ALLOCATION */ 179 { /* IOQ_ALLOCATION */
179 0x06, 0x03, 180 0x06, 0x03,
180 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 181 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
181 { 0, 0 } } 182 { 0, 0 } }
182 }, 183 },
183 184
184 { /* IOQ_ACTIVE_ENTRIES */ 185 { /* IOQ_ACTIVE_ENTRIES */
185 0x06, 0x1a, 186 0x06, 0x1a,
186 { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, 187 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
187 { 0, 0 } } 188 { 0, 0 } }
188 }, 189 },
189 190
190 { /* FSB_DATA_ACTIVITY */ 191 { /* FSB_DATA_ACTIVITY */
191 0x06, 0x17, 192 0x06, 0x17,
192 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 193 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
193 { CTR_BPU_2, MSR_P4_FSB_ESCR1} } 194 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
194 }, 195 },
195 196
196 { /* BSQ_ALLOCATION */ 197 { /* BSQ_ALLOCATION */
197 0x07, 0x05, 198 0x07, 0x05,
198 { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, 199 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
199 { 0, 0 } } 200 { 0, 0 } }
200 }, 201 },
201 202
202 { /* BSQ_ACTIVE_ENTRIES */ 203 { /* BSQ_ACTIVE_ENTRIES */
203 0x07, 0x06, 204 0x07, 0x06,
204 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, 205 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
205 { 0, 0 } } 206 { 0, 0 } }
206 }, 207 },
207 208
208 { /* X87_ASSIST */ 209 { /* X87_ASSIST */
209 0x05, 0x03, 210 0x05, 0x03,
210 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 211 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
211 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 212 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
212 }, 213 },
@@ -216,21 +217,21 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
216 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 217 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
217 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 218 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
218 }, 219 },
219 220
220 { /* PACKED_SP_UOP */ 221 { /* PACKED_SP_UOP */
221 0x01, 0x08, 222 0x01, 0x08,
222 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 223 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
223 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 224 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
224 }, 225 },
225 226
226 { /* PACKED_DP_UOP */ 227 { /* PACKED_DP_UOP */
227 0x01, 0x0c, 228 0x01, 0x0c,
228 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 229 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
229 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 230 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
230 }, 231 },
231 232
232 { /* SCALAR_SP_UOP */ 233 { /* SCALAR_SP_UOP */
233 0x01, 0x0a, 234 0x01, 0x0a,
234 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 235 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
235 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 236 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
236 }, 237 },
@@ -242,31 +243,31 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
242 }, 243 },
243 244
244 { /* 64BIT_MMX_UOP */ 245 { /* 64BIT_MMX_UOP */
245 0x01, 0x02, 246 0x01, 0x02,
246 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 247 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
247 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 248 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
248 }, 249 },
249 250
250 { /* 128BIT_MMX_UOP */ 251 { /* 128BIT_MMX_UOP */
251 0x01, 0x1a, 252 0x01, 0x1a,
252 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 253 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
253 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 254 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
254 }, 255 },
255 256
256 { /* X87_FP_UOP */ 257 { /* X87_FP_UOP */
257 0x01, 0x04, 258 0x01, 0x04,
258 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 259 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
259 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 260 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
260 }, 261 },
261 262
262 { /* X87_SIMD_MOVES_UOP */ 263 { /* X87_SIMD_MOVES_UOP */
263 0x01, 0x2e, 264 0x01, 0x2e,
264 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 265 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
265 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 266 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
266 }, 267 },
267 268
268 { /* MACHINE_CLEAR */ 269 { /* MACHINE_CLEAR */
269 0x05, 0x02, 270 0x05, 0x02,
270 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 271 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
271 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 272 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
272 }, 273 },
@@ -276,9 +277,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
276 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 277 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
277 { CTR_BPU_2, MSR_P4_FSB_ESCR1} } 278 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
278 }, 279 },
279 280
280 { /* TC_MS_XFER */ 281 { /* TC_MS_XFER */
281 0x00, 0x05, 282 0x00, 0x05,
282 { { CTR_MS_0, MSR_P4_MS_ESCR0}, 283 { { CTR_MS_0, MSR_P4_MS_ESCR0},
283 { CTR_MS_2, MSR_P4_MS_ESCR1} } 284 { CTR_MS_2, MSR_P4_MS_ESCR1} }
284 }, 285 },
@@ -308,7 +309,7 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
308 }, 309 },
309 310
310 { /* INSTR_RETIRED */ 311 { /* INSTR_RETIRED */
311 0x04, 0x02, 312 0x04, 0x02,
312 { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, 313 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
313 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 314 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
314 }, 315 },
@@ -319,14 +320,14 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
319 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 320 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
320 }, 321 },
321 322
322 { /* UOP_TYPE */ 323 { /* UOP_TYPE */
323 0x02, 0x02, 324 0x02, 0x02,
324 { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, 325 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
325 { CTR_IQ_5, MSR_P4_RAT_ESCR1} } 326 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
326 }, 327 },
327 328
328 { /* RETIRED_MISPRED_BRANCH_TYPE */ 329 { /* RETIRED_MISPRED_BRANCH_TYPE */
329 0x02, 0x05, 330 0x02, 0x05,
330 { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, 331 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
331 { CTR_MS_2, MSR_P4_TBPU_ESCR1} } 332 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
332 }, 333 },
@@ -349,8 +350,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
349#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) 350#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
350#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) 351#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
351#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) 352#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
352#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) 353#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
353#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) 354#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
354 355
355#define CCCR_RESERVED_BITS 0x38030FFF 356#define CCCR_RESERVED_BITS 0x38030FFF
356#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) 357#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
@@ -360,15 +361,15 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
360#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) 361#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
361#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) 362#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
362#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) 363#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
363#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) 364#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
364#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) 365#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
365#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) 366#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
366#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) 367#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
367 368
368#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) 369#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
369#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) 370#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
370#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) 371#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
371#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) 372#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
372#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) 373#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
373 374
374 375
@@ -380,7 +381,7 @@ static unsigned int get_stagger(void)
380#ifdef CONFIG_SMP 381#ifdef CONFIG_SMP
381 int cpu = smp_processor_id(); 382 int cpu = smp_processor_id();
382 return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu))); 383 return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
383#endif 384#endif
384 return 0; 385 return 0;
385} 386}
386 387
@@ -395,25 +396,23 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT];
395 396
396static void p4_fill_in_addresses(struct op_msrs * const msrs) 397static void p4_fill_in_addresses(struct op_msrs * const msrs)
397{ 398{
398 unsigned int i; 399 unsigned int i;
399 unsigned int addr, cccraddr, stag; 400 unsigned int addr, cccraddr, stag;
400 401
401 setup_num_counters(); 402 setup_num_counters();
402 stag = get_stagger(); 403 stag = get_stagger();
403 404
404 /* initialize some registers */ 405 /* initialize some registers */
405 for (i = 0; i < num_counters; ++i) { 406 for (i = 0; i < num_counters; ++i)
406 msrs->counters[i].addr = 0; 407 msrs->counters[i].addr = 0;
407 } 408 for (i = 0; i < num_controls; ++i)
408 for (i = 0; i < num_controls; ++i) {
409 msrs->controls[i].addr = 0; 409 msrs->controls[i].addr = 0;
410 } 410
411
412 /* the counter & cccr registers we pay attention to */ 411 /* the counter & cccr registers we pay attention to */
413 for (i = 0; i < num_counters; ++i) { 412 for (i = 0; i < num_counters; ++i) {
414 addr = p4_counters[VIRT_CTR(stag, i)].counter_address; 413 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
415 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address; 414 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
416 if (reserve_perfctr_nmi(addr)){ 415 if (reserve_perfctr_nmi(addr)) {
417 msrs->counters[i].addr = addr; 416 msrs->counters[i].addr = addr;
418 msrs->controls[i].addr = cccraddr; 417 msrs->controls[i].addr = cccraddr;
419 } 418 }
@@ -447,22 +446,22 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
447 if (reserve_evntsel_nmi(addr)) 446 if (reserve_evntsel_nmi(addr))
448 msrs->controls[i].addr = addr; 447 msrs->controls[i].addr = addr;
449 } 448 }
450 449
451 for (addr = MSR_P4_MS_ESCR0 + stag; 450 for (addr = MSR_P4_MS_ESCR0 + stag;
452 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 451 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
453 if (reserve_evntsel_nmi(addr)) 452 if (reserve_evntsel_nmi(addr))
454 msrs->controls[i].addr = addr; 453 msrs->controls[i].addr = addr;
455 } 454 }
456 455
457 for (addr = MSR_P4_IX_ESCR0 + stag; 456 for (addr = MSR_P4_IX_ESCR0 + stag;
458 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 457 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
459 if (reserve_evntsel_nmi(addr)) 458 if (reserve_evntsel_nmi(addr))
460 msrs->controls[i].addr = addr; 459 msrs->controls[i].addr = addr;
461 } 460 }
462 461
463 /* there are 2 remaining non-contiguously located ESCRs */ 462 /* there are 2 remaining non-contiguously located ESCRs */
464 463
465 if (num_counters == NUM_COUNTERS_NON_HT) { 464 if (num_counters == NUM_COUNTERS_NON_HT) {
466 /* standard non-HT CPUs handle both remaining ESCRs*/ 465 /* standard non-HT CPUs handle both remaining ESCRs*/
467 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) 466 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 467 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
@@ -498,20 +497,20 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
498 unsigned int stag; 497 unsigned int stag;
499 498
500 stag = get_stagger(); 499 stag = get_stagger();
501 500
502 /* convert from counter *number* to counter *bit* */ 501 /* convert from counter *number* to counter *bit* */
503 counter_bit = 1 << VIRT_CTR(stag, ctr); 502 counter_bit = 1 << VIRT_CTR(stag, ctr);
504 503
505 /* find our event binding structure. */ 504 /* find our event binding structure. */
506 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { 505 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
507 printk(KERN_ERR 506 printk(KERN_ERR
508 "oprofile: P4 event code 0x%lx out of range\n", 507 "oprofile: P4 event code 0x%lx out of range\n",
509 counter_config[ctr].event); 508 counter_config[ctr].event);
510 return; 509 return;
511 } 510 }
512 511
513 ev = &(p4_events[counter_config[ctr].event - 1]); 512 ev = &(p4_events[counter_config[ctr].event - 1]);
514 513
515 for (i = 0; i < maxbind; i++) { 514 for (i = 0; i < maxbind; i++) {
516 if (ev->bindings[i].virt_counter & counter_bit) { 515 if (ev->bindings[i].virt_counter & counter_bit) {
517 516
@@ -526,25 +525,24 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
526 ESCR_SET_OS_1(escr, counter_config[ctr].kernel); 525 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
527 } 526 }
528 ESCR_SET_EVENT_SELECT(escr, ev->event_select); 527 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
529 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); 528 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
530 ESCR_WRITE(escr, high, ev, i); 529 ESCR_WRITE(escr, high, ev, i);
531 530
532 /* modify CCCR */ 531 /* modify CCCR */
533 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); 532 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
534 CCCR_CLEAR(cccr); 533 CCCR_CLEAR(cccr);
535 CCCR_SET_REQUIRED_BITS(cccr); 534 CCCR_SET_REQUIRED_BITS(cccr);
536 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); 535 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
537 if (stag == 0) { 536 if (stag == 0)
538 CCCR_SET_PMI_OVF_0(cccr); 537 CCCR_SET_PMI_OVF_0(cccr);
539 } else { 538 else
540 CCCR_SET_PMI_OVF_1(cccr); 539 CCCR_SET_PMI_OVF_1(cccr);
541 }
542 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); 540 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
543 return; 541 return;
544 } 542 }
545 } 543 }
546 544
547 printk(KERN_ERR 545 printk(KERN_ERR
548 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n", 546 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
549 counter_config[ctr].event, stag, ctr); 547 counter_config[ctr].event, stag, ctr);
550} 548}
@@ -559,14 +557,14 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
559 stag = get_stagger(); 557 stag = get_stagger();
560 558
561 rdmsr(MSR_IA32_MISC_ENABLE, low, high); 559 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
562 if (! MISC_PMC_ENABLED_P(low)) { 560 if (!MISC_PMC_ENABLED_P(low)) {
563 printk(KERN_ERR "oprofile: P4 PMC not available\n"); 561 printk(KERN_ERR "oprofile: P4 PMC not available\n");
564 return; 562 return;
565 } 563 }
566 564
567 /* clear the cccrs we will use */ 565 /* clear the cccrs we will use */
568 for (i = 0 ; i < num_counters ; i++) { 566 for (i = 0 ; i < num_counters ; i++) {
569 if (unlikely(!CTRL_IS_RESERVED(msrs,i))) 567 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
570 continue; 568 continue;
571 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 569 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
572 CCCR_CLEAR(low); 570 CCCR_CLEAR(low);
@@ -576,14 +574,14 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
576 574
577 /* clear all escrs (including those outside our concern) */ 575 /* clear all escrs (including those outside our concern) */
578 for (i = num_counters; i < num_controls; i++) { 576 for (i = num_counters; i < num_controls; i++) {
579 if (unlikely(!CTRL_IS_RESERVED(msrs,i))) 577 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
580 continue; 578 continue;
581 wrmsr(msrs->controls[i].addr, 0, 0); 579 wrmsr(msrs->controls[i].addr, 0, 0);
582 } 580 }
583 581
584 /* setup all counters */ 582 /* setup all counters */
585 for (i = 0 ; i < num_counters ; ++i) { 583 for (i = 0 ; i < num_counters ; ++i) {
586 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) { 584 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
587 reset_value[i] = counter_config[i].count; 585 reset_value[i] = counter_config[i].count;
588 pmc_setup_one_p4_counter(i); 586 pmc_setup_one_p4_counter(i);
589 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); 587 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
@@ -603,11 +601,11 @@ static int p4_check_ctrs(struct pt_regs * const regs,
603 stag = get_stagger(); 601 stag = get_stagger();
604 602
605 for (i = 0; i < num_counters; ++i) { 603 for (i = 0; i < num_counters; ++i) {
606 604
607 if (!reset_value[i]) 605 if (!reset_value[i])
608 continue; 606 continue;
609 607
610 /* 608 /*
611 * there is some eccentricity in the hardware which 609 * there is some eccentricity in the hardware which
612 * requires that we perform 2 extra corrections: 610 * requires that we perform 2 extra corrections:
613 * 611 *
@@ -616,24 +614,24 @@ static int p4_check_ctrs(struct pt_regs * const regs,
616 * 614 *
617 * - write the counter back twice to ensure it gets 615 * - write the counter back twice to ensure it gets
618 * updated properly. 616 * updated properly.
619 * 617 *
620 * the former seems to be related to extra NMIs happening 618 * the former seems to be related to extra NMIs happening
621 * during the current NMI; the latter is reported as errata 619 * during the current NMI; the latter is reported as errata
622 * N15 in intel doc 249199-029, pentium 4 specification 620 * N15 in intel doc 249199-029, pentium 4 specification
623 * update, though their suggested work-around does not 621 * update, though their suggested work-around does not
624 * appear to solve the problem. 622 * appear to solve the problem.
625 */ 623 */
626 624
627 real = VIRT_CTR(stag, i); 625 real = VIRT_CTR(stag, i);
628 626
629 CCCR_READ(low, high, real); 627 CCCR_READ(low, high, real);
630 CTR_READ(ctr, high, real); 628 CTR_READ(ctr, high, real);
631 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { 629 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
632 oprofile_add_sample(regs, i); 630 oprofile_add_sample(regs, i);
633 CTR_WRITE(reset_value[i], real); 631 CTR_WRITE(reset_value[i], real);
634 CCCR_CLEAR_OVF(low); 632 CCCR_CLEAR_OVF(low);
635 CCCR_WRITE(low, high, real); 633 CCCR_WRITE(low, high, real);
636 CTR_WRITE(reset_value[i], real); 634 CTR_WRITE(reset_value[i], real);
637 } 635 }
638 } 636 }
639 637
@@ -683,15 +681,16 @@ static void p4_shutdown(struct op_msrs const * const msrs)
683 int i; 681 int i;
684 682
685 for (i = 0 ; i < num_counters ; ++i) { 683 for (i = 0 ; i < num_counters ; ++i) {
686 if (CTR_IS_RESERVED(msrs,i)) 684 if (CTR_IS_RESERVED(msrs, i))
687 release_perfctr_nmi(msrs->counters[i].addr); 685 release_perfctr_nmi(msrs->counters[i].addr);
688 } 686 }
689 /* some of the control registers are specially reserved in 687 /*
688 * some of the control registers are specially reserved in
690 * conjunction with the counter registers (hence the starting offset). 689 * conjunction with the counter registers (hence the starting offset).
691 * This saves a few bits. 690 * This saves a few bits.
692 */ 691 */
693 for (i = num_counters ; i < num_controls ; ++i) { 692 for (i = num_counters ; i < num_controls ; ++i) {
694 if (CTRL_IS_RESERVED(msrs,i)) 693 if (CTRL_IS_RESERVED(msrs, i))
695 release_evntsel_nmi(msrs->controls[i].addr); 694 release_evntsel_nmi(msrs->controls[i].addr);
696 } 695 }
697} 696}
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 6a0fca78c362..22e057665e55 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -580,7 +580,7 @@ static int __cpuinit amd_cpu_notify(struct notifier_block *self,
580 unsigned long action, void *hcpu) 580 unsigned long action, void *hcpu)
581{ 581{
582 int cpu = (long)hcpu; 582 int cpu = (long)hcpu;
583 switch(action) { 583 switch (action) {
584 case CPU_ONLINE: 584 case CPU_ONLINE:
585 case CPU_ONLINE_FROZEN: 585 case CPU_ONLINE_FROZEN:
586 smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0); 586 smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0);
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 8e077185e185..006599db0dc7 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1043,35 +1043,44 @@ static void __init pcibios_fixup_irqs(void)
1043 if (io_apic_assign_pci_irqs) { 1043 if (io_apic_assign_pci_irqs) {
1044 int irq; 1044 int irq;
1045 1045
1046 if (pin) { 1046 if (!pin)
1047 /* 1047 continue;
1048 * interrupt pins are numbered starting 1048
1049 * from 1 1049 /*
1050 */ 1050 * interrupt pins are numbered starting from 1
1051 pin--; 1051 */
1052 irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, 1052 pin--;
1053 PCI_SLOT(dev->devfn), pin); 1053 irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
1054 /* 1054 PCI_SLOT(dev->devfn), pin);
1055 * Busses behind bridges are typically not listed in the MP-table. 1055 /*
1056 * In this case we have to look up the IRQ based on the parent bus, 1056 * Busses behind bridges are typically not listed in the
1057 * parent slot, and pin number. The SMP code detects such bridged 1057 * MP-table. In this case we have to look up the IRQ
1058 * busses itself so we should get into this branch reliably. 1058 * based on the parent bus, parent slot, and pin number.
1059 */ 1059 * The SMP code detects such bridged busses itself so we
1060 if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ 1060 * should get into this branch reliably.
1061 struct pci_dev *bridge = dev->bus->self; 1061 */
1062 1062 if (irq < 0 && dev->bus->parent) {
1063 pin = (pin + PCI_SLOT(dev->devfn)) % 4; 1063 /* go back to the bridge */
1064 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 1064 struct pci_dev *bridge = dev->bus->self;
1065 PCI_SLOT(bridge->devfn), pin); 1065 int bus;
1066 if (irq >= 0) 1066
1067 dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n", 1067 pin = (pin + PCI_SLOT(dev->devfn)) % 4;
1068 pci_name(bridge), 1068 bus = bridge->bus->number;
1069 'A' + pin, irq); 1069 irq = IO_APIC_get_PCI_irq_vector(bus,
1070 } 1070 PCI_SLOT(bridge->devfn), pin);
1071 if (irq >= 0) { 1071 if (irq >= 0)
1072 dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq); 1072 dev_warn(&dev->dev,
1073 dev->irq = irq; 1073 "using bridge %s INT %c to "
1074 } 1074 "get IRQ %d\n",
1075 pci_name(bridge),
1076 'A' + pin, irq);
1077 }
1078 if (irq >= 0) {
1079 dev_info(&dev->dev,
1080 "PCI->APIC IRQ transform: INT %c "
1081 "-> IRQ %d\n",
1082 'A' + pin, irq);
1083 dev->irq = irq;
1075 } 1084 }
1076 } 1085 }
1077#endif 1086#endif
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index 4fc7e872c85e..d1e9b53f9d33 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -1,5 +1,3 @@
1.text
2
3/* 1/*
4 * This may not use any stack, nor any variable that is not "NoSave": 2 * This may not use any stack, nor any variable that is not "NoSave":
5 * 3 *
@@ -12,17 +10,18 @@
12#include <asm/segment.h> 10#include <asm/segment.h>
13#include <asm/page.h> 11#include <asm/page.h>
14#include <asm/asm-offsets.h> 12#include <asm/asm-offsets.h>
13#include <asm/processor-flags.h>
15 14
16 .text 15.text
17 16
18ENTRY(swsusp_arch_suspend) 17ENTRY(swsusp_arch_suspend)
19
20 movl %esp, saved_context_esp 18 movl %esp, saved_context_esp
21 movl %ebx, saved_context_ebx 19 movl %ebx, saved_context_ebx
22 movl %ebp, saved_context_ebp 20 movl %ebp, saved_context_ebp
23 movl %esi, saved_context_esi 21 movl %esi, saved_context_esi
24 movl %edi, saved_context_edi 22 movl %edi, saved_context_edi
25 pushfl ; popl saved_context_eflags 23 pushfl
24 popl saved_context_eflags
26 25
27 call swsusp_save 26 call swsusp_save
28 ret 27 ret
@@ -59,7 +58,7 @@ done:
59 movl mmu_cr4_features, %ecx 58 movl mmu_cr4_features, %ecx
60 jecxz 1f # cr4 Pentium and higher, skip if zero 59 jecxz 1f # cr4 Pentium and higher, skip if zero
61 movl %ecx, %edx 60 movl %ecx, %edx
62 andl $~(1<<7), %edx; # PGE 61 andl $~(X86_CR4_PGE), %edx
63 movl %edx, %cr4; # turn off PGE 62 movl %edx, %cr4; # turn off PGE
641: 631:
65 movl %cr3, %eax; # flush TLB 64 movl %cr3, %eax; # flush TLB
@@ -74,7 +73,8 @@ done:
74 movl saved_context_esi, %esi 73 movl saved_context_esi, %esi
75 movl saved_context_edi, %edi 74 movl saved_context_edi, %edi
76 75
77 pushl saved_context_eflags ; popfl 76 pushl saved_context_eflags
77 popfl
78 78
79 xorl %eax, %eax 79 xorl %eax, %eax
80 80
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a4e201b47f64..7dcd321a0508 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -812,7 +812,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
812 812
813/* Early in boot, while setting up the initial pagetable, assume 813/* Early in boot, while setting up the initial pagetable, assume
814 everything is pinned. */ 814 everything is pinned. */
815static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn) 815static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
816{ 816{
817#ifdef CONFIG_FLATMEM 817#ifdef CONFIG_FLATMEM
818 BUG_ON(mem_map); /* should only be used early */ 818 BUG_ON(mem_map); /* should only be used early */
@@ -822,7 +822,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
822 822
823/* Early release_pte assumes that all pts are pinned, since there's 823/* Early release_pte assumes that all pts are pinned, since there's
824 only init_mm and anything attached to that is pinned. */ 824 only init_mm and anything attached to that is pinned. */
825static void xen_release_pte_init(u32 pfn) 825static void xen_release_pte_init(unsigned long pfn)
826{ 826{
827 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 827 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
828} 828}
@@ -838,7 +838,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
838 838
839/* This needs to make sure the new pte page is pinned iff its being 839/* This needs to make sure the new pte page is pinned iff its being
840 attached to a pinned pagetable. */ 840 attached to a pinned pagetable. */
841static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) 841static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
842{ 842{
843 struct page *page = pfn_to_page(pfn); 843 struct page *page = pfn_to_page(pfn);
844 844
@@ -856,12 +856,12 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
856 } 856 }
857} 857}
858 858
859static void xen_alloc_pte(struct mm_struct *mm, u32 pfn) 859static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
860{ 860{
861 xen_alloc_ptpage(mm, pfn, PT_PTE); 861 xen_alloc_ptpage(mm, pfn, PT_PTE);
862} 862}
863 863
864static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn) 864static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
865{ 865{
866 xen_alloc_ptpage(mm, pfn, PT_PMD); 866 xen_alloc_ptpage(mm, pfn, PT_PMD);
867} 867}
@@ -909,7 +909,7 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
909} 909}
910 910
911/* This should never happen until we're OK to use struct page */ 911/* This should never happen until we're OK to use struct page */
912static void xen_release_ptpage(u32 pfn, unsigned level) 912static void xen_release_ptpage(unsigned long pfn, unsigned level)
913{ 913{
914 struct page *page = pfn_to_page(pfn); 914 struct page *page = pfn_to_page(pfn);
915 915
@@ -923,23 +923,23 @@ static void xen_release_ptpage(u32 pfn, unsigned level)
923 } 923 }
924} 924}
925 925
926static void xen_release_pte(u32 pfn) 926static void xen_release_pte(unsigned long pfn)
927{ 927{
928 xen_release_ptpage(pfn, PT_PTE); 928 xen_release_ptpage(pfn, PT_PTE);
929} 929}
930 930
931static void xen_release_pmd(u32 pfn) 931static void xen_release_pmd(unsigned long pfn)
932{ 932{
933 xen_release_ptpage(pfn, PT_PMD); 933 xen_release_ptpage(pfn, PT_PMD);
934} 934}
935 935
936#if PAGETABLE_LEVELS == 4 936#if PAGETABLE_LEVELS == 4
937static void xen_alloc_pud(struct mm_struct *mm, u32 pfn) 937static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
938{ 938{
939 xen_alloc_ptpage(mm, pfn, PT_PUD); 939 xen_alloc_ptpage(mm, pfn, PT_PUD);
940} 940}
941 941
942static void xen_release_pud(u32 pfn) 942static void xen_release_pud(unsigned long pfn)
943{ 943{
944 xen_release_ptpage(pfn, PT_PUD); 944 xen_release_ptpage(pfn, PT_PUD);
945} 945}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index b6acc3a0af46..d67901083888 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -42,7 +42,7 @@ char * __init xen_memory_setup(void)
42 42
43 e820.nr_map = 0; 43 e820.nr_map = 0;
44 44
45 e820_add_region(0, PFN_PHYS(max_pfn), E820_RAM); 45 e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM);
46 46
47 /* 47 /*
48 * Even though this is normal, usable memory under Xen, reserve 48 * Even though this is normal, usable memory under Xen, reserve