aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/aperture_64.c3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c39
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c42
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c3
-rw-r--r--arch/x86/kernel/e820_32.c26
-rw-r--r--arch/x86/kernel/e820_64.c27
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/io_delay.c8
-rw-r--r--arch/x86/kernel/machine_kexec_64.c1
-rw-r--r--arch/x86/kernel/mfgpt_32.c1
-rw-r--r--arch/x86/kernel/pci-dma_64.c5
-rw-r--r--arch/x86/kernel/ptrace.c169
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/reboot.c18
-rw-r--r--arch/x86/kernel/setup64.c14
-rw-r--r--arch/x86/kernel/setup_32.c9
-rw-r--r--arch/x86/kernel/setup_64.c2
-rw-r--r--arch/x86/kvm/mmu.c18
-rw-r--r--arch/x86/kvm/vmx.c7
-rw-r--r--arch/x86/lguest/boot.c108
-rw-r--r--arch/x86/lguest/i386_head.S15
-rw-r--r--arch/x86/mach-rdc321x/gpio.c199
-rw-r--r--arch/x86/mach-rdc321x/platform.c2
-rw-r--r--arch/x86/mach-visws/traps.c5
-rw-r--r--arch/x86/mm/discontig_32.c1
-rw-r--r--arch/x86/mm/fault.c10
-rw-r--r--arch/x86/mm/highmem_32.c6
-rw-r--r--arch/x86/mm/hugetlbpage.c2
-rw-r--r--arch/x86/mm/ioremap.c12
-rw-r--r--arch/x86/mm/numa_64.c3
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/xen/enlighten.c47
-rw-r--r--arch/x86/xen/xen-asm.S9
33 files changed, 530 insertions, 287 deletions
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 608152a2a05e..00df126169b4 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -18,6 +18,7 @@
18#include <linux/pci.h> 18#include <linux/pci.h>
19#include <linux/bitops.h> 19#include <linux/bitops.h>
20#include <linux/ioport.h> 20#include <linux/ioport.h>
21#include <linux/suspend.h>
21#include <asm/e820.h> 22#include <asm/e820.h>
22#include <asm/io.h> 23#include <asm/io.h>
23#include <asm/gart.h> 24#include <asm/gart.h>
@@ -76,6 +77,8 @@ static u32 __init allocate_aperture(void)
76 printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", 77 printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
77 aper_size >> 10, __pa(p)); 78 aper_size >> 10, __pa(p));
78 insert_aperture_resource((u32)__pa(p), aper_size); 79 insert_aperture_resource((u32)__pa(p), aper_size);
80 register_nosave_region((u32)__pa(p) >> PAGE_SHIFT,
81 (u32)__pa(p+aper_size) >> PAGE_SHIFT);
79 82
80 return (u32)__pa(p); 83 return (u32)__pa(p);
81} 84}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index f2b5a621d27b..8a85c93bd62a 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -63,7 +63,7 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
63 */ 63 */
64static int speedstep_smi_ownership (void) 64static int speedstep_smi_ownership (void)
65{ 65{
66 u32 command, result, magic; 66 u32 command, result, magic, dummy;
67 u32 function = GET_SPEEDSTEP_OWNER; 67 u32 function = GET_SPEEDSTEP_OWNER;
68 unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; 68 unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
69 69
@@ -73,8 +73,11 @@ static int speedstep_smi_ownership (void)
73 dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); 73 dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port);
74 74
75 __asm__ __volatile__( 75 __asm__ __volatile__(
76 "push %%ebp\n"
76 "out %%al, (%%dx)\n" 77 "out %%al, (%%dx)\n"
77 : "=D" (result) 78 "pop %%ebp\n"
79 : "=D" (result), "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
80 "=S" (dummy)
78 : "a" (command), "b" (function), "c" (0), "d" (smi_port), 81 : "a" (command), "b" (function), "c" (0), "d" (smi_port),
79 "D" (0), "S" (magic) 82 "D" (0), "S" (magic)
80 : "memory" 83 : "memory"
@@ -96,7 +99,7 @@ static int speedstep_smi_ownership (void)
96 */ 99 */
97static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) 100static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
98{ 101{
99 u32 command, result = 0, edi, high_mhz, low_mhz; 102 u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
100 u32 state=0; 103 u32 state=0;
101 u32 function = GET_SPEEDSTEP_FREQS; 104 u32 function = GET_SPEEDSTEP_FREQS;
102 105
@@ -109,10 +112,12 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
109 112
110 dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); 113 dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port);
111 114
112 __asm__ __volatile__("movl $0, %%edi\n" 115 __asm__ __volatile__(
116 "push %%ebp\n"
113 "out %%al, (%%dx)\n" 117 "out %%al, (%%dx)\n"
114 : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi) 118 "pop %%ebp"
115 : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) 119 : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi), "=S" (dummy)
120 : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0)
116 ); 121 );
117 122
118 dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); 123 dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz);
@@ -135,16 +140,18 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
135static int speedstep_get_state (void) 140static int speedstep_get_state (void)
136{ 141{
137 u32 function=GET_SPEEDSTEP_STATE; 142 u32 function=GET_SPEEDSTEP_STATE;
138 u32 result, state, edi, command; 143 u32 result, state, edi, command, dummy;
139 144
140 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); 145 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
141 146
142 dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); 147 dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port);
143 148
144 __asm__ __volatile__("movl $0, %%edi\n" 149 __asm__ __volatile__(
150 "push %%ebp\n"
145 "out %%al, (%%dx)\n" 151 "out %%al, (%%dx)\n"
146 : "=a" (result), "=b" (state), "=D" (edi) 152 "pop %%ebp\n"
147 : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0) 153 : "=a" (result), "=b" (state), "=D" (edi), "=c" (dummy), "=d" (dummy), "=S" (dummy)
154 : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0), "D" (0)
148 ); 155 );
149 156
150 dprintk("state is %x, result is %x\n", state, result); 157 dprintk("state is %x, result is %x\n", state, result);
@@ -160,7 +167,7 @@ static int speedstep_get_state (void)
160 */ 167 */
161static void speedstep_set_state (unsigned int state) 168static void speedstep_set_state (unsigned int state)
162{ 169{
163 unsigned int result = 0, command, new_state; 170 unsigned int result = 0, command, new_state, dummy;
164 unsigned long flags; 171 unsigned long flags;
165 unsigned int function=SET_SPEEDSTEP_STATE; 172 unsigned int function=SET_SPEEDSTEP_STATE;
166 unsigned int retry = 0; 173 unsigned int retry = 0;
@@ -182,10 +189,12 @@ static void speedstep_set_state (unsigned int state)
182 } 189 }
183 retry++; 190 retry++;
184 __asm__ __volatile__( 191 __asm__ __volatile__(
185 "movl $0, %%edi\n" 192 "push %%ebp\n"
186 "out %%al, (%%dx)\n" 193 "out %%al, (%%dx)\n"
187 : "=b" (new_state), "=D" (result) 194 "pop %%ebp"
188 : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) 195 : "=b" (new_state), "=D" (result), "=c" (dummy), "=a" (dummy),
196 "=d" (dummy), "=S" (dummy)
197 : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0)
189 ); 198 );
190 } while ((new_state != state) && (retry <= SMI_TRIES)); 199 } while ((new_state != state) && (retry <= SMI_TRIES));
191 200
@@ -195,7 +204,7 @@ static void speedstep_set_state (unsigned int state)
195 if (new_state == state) { 204 if (new_state == state) {
196 dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); 205 dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result);
197 } else { 206 } else {
198 printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result); 207 printk(KERN_ERR "cpufreq: change to state %u failed with new_state %u and result %u\n", state, new_state, result);
199 } 208 }
200 209
201 return; 210 return;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 103d61a59b19..3e18db4cefee 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -176,12 +176,13 @@ static inline void k8_enable_fixed_iorrs(void)
176} 176}
177 177
178/** 178/**
179 * Checks and updates an fixed-range MTRR if it differs from the value it 179 * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have
180 * should have. If K8 extentions are wanted, update the K8 SYSCFG MSR also. 180 * @msr: MSR address of the MTTR which should be checked and updated
181 * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information 181 * @changed: pointer which indicates whether the MTRR needed to be changed
182 * \param msr MSR address of the MTTR which should be checked and updated 182 * @msrwords: pointer to the MSR values which the MSR should have
183 * \param changed pointer which indicates whether the MTRR needed to be changed 183 *
184 * \param msrwords pointer to the MSR values which the MSR should have 184 * If K8 extentions are wanted, update the K8 SYSCFG MSR also.
185 * See AMD publication no. 24593, chapter 7.8.1, page 233 for more information.
185 */ 186 */
186static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) 187static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
187{ 188{
@@ -199,12 +200,15 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
199 } 200 }
200} 201}
201 202
203/**
204 * generic_get_free_region - Get a free MTRR.
205 * @base: The starting (base) address of the region.
206 * @size: The size (in bytes) of the region.
207 * @replace_reg: mtrr index to be replaced; set to invalid value if none.
208 *
209 * Returns: The index of the region on success, else negative on error.
210 */
202int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) 211int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
203/* [SUMMARY] Get a free MTRR.
204 <base> The starting (base) address of the region.
205 <size> The size (in bytes) of the region.
206 [RETURNS] The index of the region on success, else -1 on error.
207*/
208{ 212{
209 int i, max; 213 int i, max;
210 mtrr_type ltype; 214 mtrr_type ltype;
@@ -249,8 +253,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
249} 253}
250 254
251/** 255/**
252 * Checks and updates the fixed-range MTRRs if they differ from the saved set 256 * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set
253 * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges() 257 * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges()
254 */ 258 */
255static int set_fixed_ranges(mtrr_type * frs) 259static int set_fixed_ranges(mtrr_type * frs)
256{ 260{
@@ -294,13 +298,13 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
294 298
295static u32 deftype_lo, deftype_hi; 299static u32 deftype_lo, deftype_hi;
296 300
301/**
302 * set_mtrr_state - Set the MTRR state for this CPU.
303 *
304 * NOTE: The CPU must already be in a safe state for MTRR changes.
305 * RETURNS: 0 if no changes made, else a mask indicating what was changed.
306 */
297static unsigned long set_mtrr_state(void) 307static unsigned long set_mtrr_state(void)
298/* [SUMMARY] Set the MTRR state for this CPU.
299 <state> The MTRR state information to read.
300 <ctxt> Some relevant CPU context.
301 [NOTE] The CPU must already be in a safe state for MTRR changes.
302 [RETURNS] 0 if no changes made, else a mask indication what was changed.
303*/
304{ 308{
305 unsigned int i; 309 unsigned int i;
306 unsigned long change_mask = 0; 310 unsigned long change_mask = 0;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index be83336fddba..a6450b3ae759 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -711,7 +711,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
711 trim_size = end_pfn; 711 trim_size = end_pfn;
712 trim_size <<= PAGE_SHIFT; 712 trim_size <<= PAGE_SHIFT;
713 trim_size -= trim_start; 713 trim_size -= trim_start;
714 add_memory_region(trim_start, trim_size, E820_RESERVED); 714 update_memory_range(trim_start, trim_size, E820_RAM,
715 E820_RESERVED);
715 update_e820(); 716 update_e820();
716 return 1; 717 return 1;
717 } 718 }
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 4e16ef4a2659..80444c5c9b14 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -749,6 +749,32 @@ static int __init parse_memmap(char *arg)
749 return 0; 749 return 0;
750} 750}
751early_param("memmap", parse_memmap); 751early_param("memmap", parse_memmap);
752void __init update_memory_range(u64 start, u64 size, unsigned old_type,
753 unsigned new_type)
754{
755 int i;
756
757 BUG_ON(old_type == new_type);
758
759 for (i = 0; i < e820.nr_map; i++) {
760 struct e820entry *ei = &e820.map[i];
761 u64 final_start, final_end;
762 if (ei->type != old_type)
763 continue;
764 /* totally covered? */
765 if (ei->addr >= start && ei->size <= size) {
766 ei->type = new_type;
767 continue;
768 }
769 /* partially covered */
770 final_start = max(start, ei->addr);
771 final_end = min(start + size, ei->addr + ei->size);
772 if (final_start >= final_end)
773 continue;
774 add_memory_region(final_start, final_end - final_start,
775 new_type);
776 }
777}
752void __init update_e820(void) 778void __init update_e820(void)
753{ 779{
754 u8 nr_map; 780 u8 nr_map;
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 9f65b4cc323c..9be697126013 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -744,6 +744,33 @@ void __init finish_e820_parsing(void)
744 } 744 }
745} 745}
746 746
747void __init update_memory_range(u64 start, u64 size, unsigned old_type,
748 unsigned new_type)
749{
750 int i;
751
752 BUG_ON(old_type == new_type);
753
754 for (i = 0; i < e820.nr_map; i++) {
755 struct e820entry *ei = &e820.map[i];
756 u64 final_start, final_end;
757 if (ei->type != old_type)
758 continue;
759 /* totally covered? */
760 if (ei->addr >= start && ei->size <= size) {
761 ei->type = new_type;
762 continue;
763 }
764 /* partially covered */
765 final_start = max(start, ei->addr);
766 final_end = min(start + size, ei->addr + ei->size);
767 if (final_start >= final_end)
768 continue;
769 add_memory_region(final_start, final_end - final_start,
770 new_type);
771 }
772}
773
747void __init update_e820(void) 774void __init update_e820(void)
748{ 775{
749 u8 nr_map; 776 u8 nr_map;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index fd8ca53943a8..74d87ea85b5c 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -657,7 +657,7 @@ int_msg:
657 .asciz "Unknown interrupt or fault at EIP %p %p %p\n" 657 .asciz "Unknown interrupt or fault at EIP %p %p %p\n"
658 658
659fault_msg: 659fault_msg:
660 .ascii \ 660 .asciz \
661/* fault info: */ "BUG: Int %d: CR2 %p\n" \ 661/* fault info: */ "BUG: Int %d: CR2 %p\n" \
662/* pusha regs: */ " EDI %p ESI %p EBP %p ESP %p\n" \ 662/* pusha regs: */ " EDI %p ESI %p EBP %p ESP %p\n" \
663 " EBX %p EDX %p ECX %p EAX %p\n" \ 663 " EBX %p EDX %p ECX %p EAX %p\n" \
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index c706a3061553..5921e5f0a640 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -78,6 +78,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
78 }, 78 },
79 { 79 {
80 .callback = dmi_io_delay_0xed_port, 80 .callback = dmi_io_delay_0xed_port,
81 .ident = "HP Pavilion dv6000",
82 .matches = {
83 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
84 DMI_MATCH(DMI_BOARD_NAME, "30B8")
85 }
86 },
87 {
88 .callback = dmi_io_delay_0xed_port,
81 .ident = "HP Pavilion tx1000", 89 .ident = "HP Pavilion tx1000",
82 .matches = { 90 .matches = {
83 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), 91 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 236d2f8f7ddc..576a03db4511 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -233,6 +233,7 @@ NORET_TYPE void machine_kexec(struct kimage *image)
233 233
234void arch_crash_save_vmcoreinfo(void) 234void arch_crash_save_vmcoreinfo(void)
235{ 235{
236 VMCOREINFO_SYMBOL(phys_base);
236 VMCOREINFO_SYMBOL(init_level4_pgt); 237 VMCOREINFO_SYMBOL(init_level4_pgt);
237 238
238#ifdef CONFIG_NUMA 239#ifdef CONFIG_NUMA
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 027fc067b399..b402c0f3f192 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -30,6 +30,7 @@
30 30
31#include <linux/kernel.h> 31#include <linux/kernel.h>
32#include <linux/interrupt.h> 32#include <linux/interrupt.h>
33#include <linux/module.h>
33#include <asm/geode.h> 34#include <asm/geode.h>
34 35
35static struct mfgpt_timer_t { 36static struct mfgpt_timer_t {
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index a82473d192a3..375cb2bc45be 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -53,11 +53,6 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
53 int node; 53 int node;
54 54
55 node = dev_to_node(dev); 55 node = dev_to_node(dev);
56 if (node == -1)
57 node = numa_node_id();
58
59 if (node < first_node(node_online_map))
60 node = first_node(node_online_map);
61 56
62 page = alloc_pages_node(node, gfp, order); 57 page = alloc_pages_node(node, gfp, order);
63 return page ? page_address(page) : NULL; 58 return page ? page_address(page) : NULL;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index d5904eef1d31..eb92ccbb3502 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -600,21 +600,6 @@ static int ptrace_bts_read_record(struct task_struct *child,
600 return sizeof(ret); 600 return sizeof(ret);
601} 601}
602 602
603static int ptrace_bts_write_record(struct task_struct *child,
604 const struct bts_struct *in)
605{
606 int retval;
607
608 if (!child->thread.ds_area_msr)
609 return -ENXIO;
610
611 retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
612 if (retval)
613 return retval;
614
615 return sizeof(*in);
616}
617
618static int ptrace_bts_clear(struct task_struct *child) 603static int ptrace_bts_clear(struct task_struct *child)
619{ 604{
620 if (!child->thread.ds_area_msr) 605 if (!child->thread.ds_area_msr)
@@ -657,75 +642,6 @@ static int ptrace_bts_drain(struct task_struct *child,
657 return end; 642 return end;
658} 643}
659 644
660static int ptrace_bts_realloc(struct task_struct *child,
661 int size, int reduce_size)
662{
663 unsigned long rlim, vm;
664 int ret, old_size;
665
666 if (size < 0)
667 return -EINVAL;
668
669 old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
670 if (old_size < 0)
671 return old_size;
672
673 ret = ds_free((void **)&child->thread.ds_area_msr);
674 if (ret < 0)
675 goto out;
676
677 size >>= PAGE_SHIFT;
678 old_size >>= PAGE_SHIFT;
679
680 current->mm->total_vm -= old_size;
681 current->mm->locked_vm -= old_size;
682
683 if (size == 0)
684 goto out;
685
686 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
687 vm = current->mm->total_vm + size;
688 if (rlim < vm) {
689 ret = -ENOMEM;
690
691 if (!reduce_size)
692 goto out;
693
694 size = rlim - current->mm->total_vm;
695 if (size <= 0)
696 goto out;
697 }
698
699 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
700 vm = current->mm->locked_vm + size;
701 if (rlim < vm) {
702 ret = -ENOMEM;
703
704 if (!reduce_size)
705 goto out;
706
707 size = rlim - current->mm->locked_vm;
708 if (size <= 0)
709 goto out;
710 }
711
712 ret = ds_allocate((void **)&child->thread.ds_area_msr,
713 size << PAGE_SHIFT);
714 if (ret < 0)
715 goto out;
716
717 current->mm->total_vm += size;
718 current->mm->locked_vm += size;
719
720out:
721 if (child->thread.ds_area_msr)
722 set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
723 else
724 clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
725
726 return ret;
727}
728
729static int ptrace_bts_config(struct task_struct *child, 645static int ptrace_bts_config(struct task_struct *child,
730 long cfg_size, 646 long cfg_size,
731 const struct ptrace_bts_config __user *ucfg) 647 const struct ptrace_bts_config __user *ucfg)
@@ -828,6 +744,91 @@ static int ptrace_bts_status(struct task_struct *child,
828 return sizeof(cfg); 744 return sizeof(cfg);
829} 745}
830 746
747
748static int ptrace_bts_write_record(struct task_struct *child,
749 const struct bts_struct *in)
750{
751 int retval;
752
753 if (!child->thread.ds_area_msr)
754 return -ENXIO;
755
756 retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
757 if (retval)
758 return retval;
759
760 return sizeof(*in);
761}
762
763static int ptrace_bts_realloc(struct task_struct *child,
764 int size, int reduce_size)
765{
766 unsigned long rlim, vm;
767 int ret, old_size;
768
769 if (size < 0)
770 return -EINVAL;
771
772 old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
773 if (old_size < 0)
774 return old_size;
775
776 ret = ds_free((void **)&child->thread.ds_area_msr);
777 if (ret < 0)
778 goto out;
779
780 size >>= PAGE_SHIFT;
781 old_size >>= PAGE_SHIFT;
782
783 current->mm->total_vm -= old_size;
784 current->mm->locked_vm -= old_size;
785
786 if (size == 0)
787 goto out;
788
789 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
790 vm = current->mm->total_vm + size;
791 if (rlim < vm) {
792 ret = -ENOMEM;
793
794 if (!reduce_size)
795 goto out;
796
797 size = rlim - current->mm->total_vm;
798 if (size <= 0)
799 goto out;
800 }
801
802 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
803 vm = current->mm->locked_vm + size;
804 if (rlim < vm) {
805 ret = -ENOMEM;
806
807 if (!reduce_size)
808 goto out;
809
810 size = rlim - current->mm->locked_vm;
811 if (size <= 0)
812 goto out;
813 }
814
815 ret = ds_allocate((void **)&child->thread.ds_area_msr,
816 size << PAGE_SHIFT);
817 if (ret < 0)
818 goto out;
819
820 current->mm->total_vm += size;
821 current->mm->locked_vm += size;
822
823out:
824 if (child->thread.ds_area_msr)
825 set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
826 else
827 clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
828
829 return ret;
830}
831
831void ptrace_bts_take_timestamp(struct task_struct *tsk, 832void ptrace_bts_take_timestamp(struct task_struct *tsk,
832 enum bts_qualifier qualifier) 833 enum bts_qualifier qualifier)
833{ 834{
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index c47208fc5932..d89a648fe710 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -363,6 +363,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0051,
363 nvidia_force_enable_hpet); 363 nvidia_force_enable_hpet);
364 364
365/* LPC bridges */ 365/* LPC bridges */
366DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0260,
367 nvidia_force_enable_hpet);
366DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0360, 368DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0360,
367 nvidia_force_enable_hpet); 369 nvidia_force_enable_hpet);
368DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0361, 370DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0361,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 55ceb8cdef75..484c4a80d38a 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -152,6 +152,24 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
152 DMI_MATCH(DMI_BOARD_NAME, "0WF810"), 152 DMI_MATCH(DMI_BOARD_NAME, "0WF810"),
153 }, 153 },
154 }, 154 },
155 { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/
156 .callback = set_bios_reboot,
157 .ident = "Dell OptiPlex 745",
158 .matches = {
159 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
160 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
161 DMI_MATCH(DMI_BOARD_NAME, "0MM599"),
162 },
163 },
164 { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */
165 .callback = set_bios_reboot,
166 .ident = "Dell OptiPlex 745",
167 .matches = {
168 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
169 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
170 DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
171 },
172 },
155 { /* Handle problems with rebooting on Dell 2400's */ 173 { /* Handle problems with rebooting on Dell 2400's */
156 .callback = set_bios_reboot, 174 .callback = set_bios_reboot,
157 .ident = "Dell PowerEdge 2400", 175 .ident = "Dell PowerEdge 2400",
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 309366f8f603..e24c45677094 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -142,14 +142,16 @@ void __init setup_per_cpu_areas(void)
142 printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size); 142 printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
143 for_each_cpu_mask (i, cpu_possible_map) { 143 for_each_cpu_mask (i, cpu_possible_map) {
144 char *ptr; 144 char *ptr;
145#ifndef CONFIG_NEED_MULTIPLE_NODES
146 ptr = alloc_bootmem_pages(size);
147#else
148 int node = early_cpu_to_node(i);
145 149
146 if (!NODE_DATA(early_cpu_to_node(i))) { 150 if (!node_online(node) || !NODE_DATA(node))
147 printk("cpu with no node %d, num_online_nodes %d\n",
148 i, num_online_nodes());
149 ptr = alloc_bootmem_pages(size); 151 ptr = alloc_bootmem_pages(size);
150 } else { 152 else
151 ptr = alloc_bootmem_pages_node(NODE_DATA(early_cpu_to_node(i)), size); 153 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
152 } 154#endif
153 if (!ptr) 155 if (!ptr)
154 panic("Cannot allocate cpu data for CPU %d\n", i); 156 panic("Cannot allocate cpu data for CPU %d\n", i);
155 cpu_pda(i)->data_offset = ptr - __per_cpu_start; 157 cpu_pda(i)->data_offset = ptr - __per_cpu_start;
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index a1d7071a51c9..2b3e5d45176b 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -406,8 +406,6 @@ static unsigned long __init setup_memory(void)
406 */ 406 */
407 min_low_pfn = PFN_UP(init_pg_tables_end); 407 min_low_pfn = PFN_UP(init_pg_tables_end);
408 408
409 find_max_pfn();
410
411 max_low_pfn = find_max_low_pfn(); 409 max_low_pfn = find_max_low_pfn();
412 410
413#ifdef CONFIG_HIGHMEM 411#ifdef CONFIG_HIGHMEM
@@ -764,12 +762,13 @@ void __init setup_arch(char **cmdline_p)
764 if (efi_enabled) 762 if (efi_enabled)
765 efi_init(); 763 efi_init();
766 764
767 max_low_pfn = setup_memory();
768
769 /* update e820 for memory not covered by WB MTRRs */ 765 /* update e820 for memory not covered by WB MTRRs */
766 find_max_pfn();
770 mtrr_bp_init(); 767 mtrr_bp_init();
771 if (mtrr_trim_uncached_memory(max_pfn)) 768 if (mtrr_trim_uncached_memory(max_pfn))
772 max_low_pfn = setup_memory(); 769 find_max_pfn();
770
771 max_low_pfn = setup_memory();
773 772
774#ifdef CONFIG_VMI 773#ifdef CONFIG_VMI
775 /* 774 /*
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 7637dc91c79b..f4f7ecfb898c 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -801,7 +801,7 @@ static void __cpuinit srat_detect_node(void)
801 /* Don't do the funky fallback heuristics the AMD version employs 801 /* Don't do the funky fallback heuristics the AMD version employs
802 for now. */ 802 for now. */
803 node = apicid_to_node[apicid]; 803 node = apicid_to_node[apicid];
804 if (node == NUMA_NO_NODE) 804 if (node == NUMA_NO_NODE || !node_online(node))
805 node = first_node(node_online_map); 805 node = first_node(node_online_map);
806 numa_set_node(cpu, node); 806 numa_set_node(cpu, node);
807 807
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d8172aabc660..e55af12e11b7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -222,8 +222,7 @@ static int is_io_pte(unsigned long pte)
222 222
223static int is_rmap_pte(u64 pte) 223static int is_rmap_pte(u64 pte)
224{ 224{
225 return pte != shadow_trap_nonpresent_pte 225 return is_shadow_present_pte(pte);
226 && pte != shadow_notrap_nonpresent_pte;
227} 226}
228 227
229static gfn_t pse36_gfn_delta(u32 gpte) 228static gfn_t pse36_gfn_delta(u32 gpte)
@@ -893,14 +892,25 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
893 int *ptwrite, gfn_t gfn, struct page *page) 892 int *ptwrite, gfn_t gfn, struct page *page)
894{ 893{
895 u64 spte; 894 u64 spte;
896 int was_rmapped = is_rmap_pte(*shadow_pte); 895 int was_rmapped = 0;
897 int was_writeble = is_writeble_pte(*shadow_pte); 896 int was_writeble = is_writeble_pte(*shadow_pte);
897 hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
898 898
899 pgprintk("%s: spte %llx access %x write_fault %d" 899 pgprintk("%s: spte %llx access %x write_fault %d"
900 " user_fault %d gfn %lx\n", 900 " user_fault %d gfn %lx\n",
901 __FUNCTION__, *shadow_pte, pt_access, 901 __FUNCTION__, *shadow_pte, pt_access,
902 write_fault, user_fault, gfn); 902 write_fault, user_fault, gfn);
903 903
904 if (is_rmap_pte(*shadow_pte)) {
905 if (host_pfn != page_to_pfn(page)) {
906 pgprintk("hfn old %lx new %lx\n",
907 host_pfn, page_to_pfn(page));
908 rmap_remove(vcpu->kvm, shadow_pte);
909 }
910 else
911 was_rmapped = 1;
912 }
913
904 /* 914 /*
905 * We don't set the accessed bit, since we sometimes want to see 915 * We don't set the accessed bit, since we sometimes want to see
906 * whether the guest actually used the pte (in order to detect 916 * whether the guest actually used the pte (in order to detect
@@ -1402,7 +1412,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1402 up_read(&current->mm->mmap_sem); 1412 up_read(&current->mm->mmap_sem);
1403 1413
1404 vcpu->arch.update_pte.gfn = gfn; 1414 vcpu->arch.update_pte.gfn = gfn;
1405 vcpu->arch.update_pte.page = gfn_to_page(vcpu->kvm, gfn); 1415 vcpu->arch.update_pte.page = page;
1406} 1416}
1407 1417
1408void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 1418void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 94ea724638fd..8e1462880d1f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -349,8 +349,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
349 349
350static void reload_tss(void) 350static void reload_tss(void)
351{ 351{
352#ifndef CONFIG_X86_64
353
354 /* 352 /*
355 * VT restores TR but not its size. Useless. 353 * VT restores TR but not its size. Useless.
356 */ 354 */
@@ -361,7 +359,6 @@ static void reload_tss(void)
361 descs = (void *)gdt.base; 359 descs = (void *)gdt.base;
362 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ 360 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
363 load_TR_desc(); 361 load_TR_desc();
364#endif
365} 362}
366 363
367static void load_transition_efer(struct vcpu_vmx *vmx) 364static void load_transition_efer(struct vcpu_vmx *vmx)
@@ -1436,7 +1433,7 @@ static int init_rmode_tss(struct kvm *kvm)
1436 int ret = 0; 1433 int ret = 0;
1437 int r; 1434 int r;
1438 1435
1439 down_read(&current->mm->mmap_sem); 1436 down_read(&kvm->slots_lock);
1440 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); 1437 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
1441 if (r < 0) 1438 if (r < 0)
1442 goto out; 1439 goto out;
@@ -1459,7 +1456,7 @@ static int init_rmode_tss(struct kvm *kvm)
1459 1456
1460 ret = 1; 1457 ret = 1;
1461out: 1458out:
1462 up_read(&current->mm->mmap_sem); 1459 up_read(&kvm->slots_lock);
1463 return ret; 1460 return ret;
1464} 1461}
1465 1462
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a104c532ff70..3335b4595efd 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -10,21 +10,19 @@
10 * (such as the example in Documentation/lguest/lguest.c) is called the 10 * (such as the example in Documentation/lguest/lguest.c) is called the
11 * Launcher. 11 * Launcher.
12 * 12 *
13 * Secondly, we only run specially modified Guests, not normal kernels. When 13 * Secondly, we only run specially modified Guests, not normal kernels: setting
14 * you set CONFIG_LGUEST to 'y' or 'm', this automatically sets 14 * CONFIG_LGUEST_GUEST to "y" compiles this file into the kernel so it knows
15 * CONFIG_LGUEST_GUEST=y, which compiles this file into the kernel so it knows 15 * how to be a Guest at boot time. This means that you can use the same kernel
16 * how to be a Guest. This means that you can use the same kernel you boot 16 * you boot normally (ie. as a Host) as a Guest.
17 * normally (ie. as a Host) as a Guest.
18 * 17 *
19 * These Guests know that they cannot do privileged operations, such as disable 18 * These Guests know that they cannot do privileged operations, such as disable
20 * interrupts, and that they have to ask the Host to do such things explicitly. 19 * interrupts, and that they have to ask the Host to do such things explicitly.
21 * This file consists of all the replacements for such low-level native 20 * This file consists of all the replacements for such low-level native
22 * hardware operations: these special Guest versions call the Host. 21 * hardware operations: these special Guest versions call the Host.
23 * 22 *
24 * So how does the kernel know it's a Guest? The Guest starts at a special 23 * So how does the kernel know it's a Guest? We'll see that later, but let's
25 * entry point marked with a magic string, which sets up a few things then 24 * just say that we end up here where we replace the native functions various
26 * calls here. We replace the native functions various "paravirt" structures 25 * "paravirt" structures with our Guest versions, then boot like normal. :*/
27 * with our Guest versions, then boot like normal. :*/
28 26
29/* 27/*
30 * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. 28 * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation.
@@ -134,7 +132,7 @@ static void async_hcall(unsigned long call, unsigned long arg1,
134 * lguest_leave_lazy_mode(). 132 * lguest_leave_lazy_mode().
135 * 133 *
136 * So, when we're in lazy mode, we call async_hcall() to store the call for 134 * So, when we're in lazy mode, we call async_hcall() to store the call for
137 * future processing. */ 135 * future processing: */
138static void lazy_hcall(unsigned long call, 136static void lazy_hcall(unsigned long call,
139 unsigned long arg1, 137 unsigned long arg1,
140 unsigned long arg2, 138 unsigned long arg2,
@@ -147,7 +145,7 @@ static void lazy_hcall(unsigned long call,
147} 145}
148 146
149/* When lazy mode is turned off reset the per-cpu lazy mode variable and then 147/* When lazy mode is turned off reset the per-cpu lazy mode variable and then
150 * issue a hypercall to flush any stored calls. */ 148 * issue the do-nothing hypercall to flush any stored calls. */
151static void lguest_leave_lazy_mode(void) 149static void lguest_leave_lazy_mode(void)
152{ 150{
153 paravirt_leave_lazy(paravirt_get_lazy_mode()); 151 paravirt_leave_lazy(paravirt_get_lazy_mode());
@@ -164,7 +162,7 @@ static void lguest_leave_lazy_mode(void)
164 * 162 *
165 * So instead we keep an "irq_enabled" field inside our "struct lguest_data", 163 * So instead we keep an "irq_enabled" field inside our "struct lguest_data",
166 * which the Guest can update with a single instruction. The Host knows to 164 * which the Guest can update with a single instruction. The Host knows to
167 * check there when it wants to deliver an interrupt. 165 * check there before it tries to deliver an interrupt.
168 */ 166 */
169 167
170/* save_flags() is expected to return the processor state (ie. "flags"). The 168/* save_flags() is expected to return the processor state (ie. "flags"). The
@@ -196,10 +194,15 @@ static void irq_enable(void)
196/*M:003 Note that we don't check for outstanding interrupts when we re-enable 194/*M:003 Note that we don't check for outstanding interrupts when we re-enable
197 * them (or when we unmask an interrupt). This seems to work for the moment, 195 * them (or when we unmask an interrupt). This seems to work for the moment,
198 * since interrupts are rare and we'll just get the interrupt on the next timer 196 * since interrupts are rare and we'll just get the interrupt on the next timer
199 * tick, but when we turn on CONFIG_NO_HZ, we should revisit this. One way 197 * tick, but now we can run with CONFIG_NO_HZ, we should revisit this. One way
200 * would be to put the "irq_enabled" field in a page by itself, and have the 198 * would be to put the "irq_enabled" field in a page by itself, and have the
201 * Host write-protect it when an interrupt comes in when irqs are disabled. 199 * Host write-protect it when an interrupt comes in when irqs are disabled.
202 * There will then be a page fault as soon as interrupts are re-enabled. :*/ 200 * There will then be a page fault as soon as interrupts are re-enabled.
201 *
202 * A better method is to implement soft interrupt disable generally for x86:
203 * instead of disabling interrupts, we set a flag. If an interrupt does come
204 * in, we then disable them for real. This is uncommon, so we could simply use
205 * a hypercall for interrupt control and not worry about efficiency. :*/
203 206
204/*G:034 207/*G:034
205 * The Interrupt Descriptor Table (IDT). 208 * The Interrupt Descriptor Table (IDT).
@@ -212,6 +215,10 @@ static void irq_enable(void)
212static void lguest_write_idt_entry(gate_desc *dt, 215static void lguest_write_idt_entry(gate_desc *dt,
213 int entrynum, const gate_desc *g) 216 int entrynum, const gate_desc *g)
214{ 217{
218 /* The gate_desc structure is 8 bytes long: we hand it to the Host in
219 * two 32-bit chunks. The whole 32-bit kernel used to hand descriptors
220 * around like this; typesafety wasn't a big concern in Linux's early
221 * years. */
215 u32 *desc = (u32 *)g; 222 u32 *desc = (u32 *)g;
216 /* Keep the local copy up to date. */ 223 /* Keep the local copy up to date. */
217 native_write_idt_entry(dt, entrynum, g); 224 native_write_idt_entry(dt, entrynum, g);
@@ -243,7 +250,8 @@ static void lguest_load_idt(const struct desc_ptr *desc)
243 * 250 *
244 * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY 251 * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY
245 * hypercall and use that repeatedly to load a new IDT. I don't think it 252 * hypercall and use that repeatedly to load a new IDT. I don't think it
246 * really matters, but wouldn't it be nice if they were the same? 253 * really matters, but wouldn't it be nice if they were the same? Wouldn't
254 * it be even better if you were the one to send the patch to fix it?
247 */ 255 */
248static void lguest_load_gdt(const struct desc_ptr *desc) 256static void lguest_load_gdt(const struct desc_ptr *desc)
249{ 257{
@@ -298,9 +306,9 @@ static void lguest_load_tr_desc(void)
298 306
299/* The "cpuid" instruction is a way of querying both the CPU identity 307/* The "cpuid" instruction is a way of querying both the CPU identity
300 * (manufacturer, model, etc) and its features. It was introduced before the 308 * (manufacturer, model, etc) and its features. It was introduced before the
301 * Pentium in 1993 and keeps getting extended by both Intel and AMD. As you 309 * Pentium in 1993 and keeps getting extended by both Intel, AMD and others.
302 * might imagine, after a decade and a half this treatment, it is now a giant 310 * As you might imagine, after a decade and a half this treatment, it is now a
303 * ball of hair. Its entry in the current Intel manual runs to 28 pages. 311 * giant ball of hair. Its entry in the current Intel manual runs to 28 pages.
304 * 312 *
305 * This instruction even it has its own Wikipedia entry. The Wikipedia entry 313 * This instruction even it has its own Wikipedia entry. The Wikipedia entry
306 * has been translated into 4 languages. I am not making this up! 314 * has been translated into 4 languages. I am not making this up!
@@ -594,17 +602,17 @@ static unsigned long lguest_get_wallclock(void)
594 return lguest_data.time.tv_sec; 602 return lguest_data.time.tv_sec;
595} 603}
596 604
597/* The TSC is a Time Stamp Counter. The Host tells us what speed it runs at, 605/* The TSC is an Intel thing called the Time Stamp Counter. The Host tells us
598 * or 0 if it's unusable as a reliable clock source. This matches what we want 606 * what speed it runs at, or 0 if it's unusable as a reliable clock source.
599 * here: if we return 0 from this function, the x86 TSC clock will not register 607 * This matches what we want here: if we return 0 from this function, the x86
600 * itself. */ 608 * TSC clock will give up and not register itself. */
601static unsigned long lguest_cpu_khz(void) 609static unsigned long lguest_cpu_khz(void)
602{ 610{
603 return lguest_data.tsc_khz; 611 return lguest_data.tsc_khz;
604} 612}
605 613
606/* If we can't use the TSC, the kernel falls back to our "lguest_clock", where 614/* If we can't use the TSC, the kernel falls back to our lower-priority
607 * we read the time value given to us by the Host. */ 615 * "lguest_clock", where we read the time value given to us by the Host. */
608static cycle_t lguest_clock_read(void) 616static cycle_t lguest_clock_read(void)
609{ 617{
610 unsigned long sec, nsec; 618 unsigned long sec, nsec;
@@ -648,12 +656,16 @@ static struct clocksource lguest_clock = {
648static int lguest_clockevent_set_next_event(unsigned long delta, 656static int lguest_clockevent_set_next_event(unsigned long delta,
649 struct clock_event_device *evt) 657 struct clock_event_device *evt)
650{ 658{
659 /* FIXME: I don't think this can ever happen, but James tells me he had
660 * to put this code in. Maybe we should remove it now. Anyone? */
651 if (delta < LG_CLOCK_MIN_DELTA) { 661 if (delta < LG_CLOCK_MIN_DELTA) {
652 if (printk_ratelimit()) 662 if (printk_ratelimit())
653 printk(KERN_DEBUG "%s: small delta %lu ns\n", 663 printk(KERN_DEBUG "%s: small delta %lu ns\n",
654 __FUNCTION__, delta); 664 __FUNCTION__, delta);
655 return -ETIME; 665 return -ETIME;
656 } 666 }
667
668 /* Please wake us this far in the future. */
657 hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0); 669 hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0);
658 return 0; 670 return 0;
659} 671}
@@ -738,7 +750,7 @@ static void lguest_time_init(void)
738 * will not tolerate us trying to use that), the stack pointer, and the number 750 * will not tolerate us trying to use that), the stack pointer, and the number
739 * of pages in the stack. */ 751 * of pages in the stack. */
740static void lguest_load_sp0(struct tss_struct *tss, 752static void lguest_load_sp0(struct tss_struct *tss,
741 struct thread_struct *thread) 753 struct thread_struct *thread)
742{ 754{
743 lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->sp0, 755 lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->sp0,
744 THREAD_SIZE/PAGE_SIZE); 756 THREAD_SIZE/PAGE_SIZE);
@@ -786,9 +798,8 @@ static void lguest_safe_halt(void)
786 hcall(LHCALL_HALT, 0, 0, 0); 798 hcall(LHCALL_HALT, 0, 0, 0);
787} 799}
788 800
789/* Perhaps CRASH isn't the best name for this hypercall, but we use it to get a 801/* The SHUTDOWN hypercall takes a string to describe what's happening, and
790 * message out when we're crashing as well as elegant termination like powering 802 * an argument which says whether this to restart (reboot) the Guest or not.
791 * off.
792 * 803 *
793 * Note that the Host always prefers that the Guest speak in physical addresses 804 * Note that the Host always prefers that the Guest speak in physical addresses
794 * rather than virtual addresses, so we use __pa() here. */ 805 * rather than virtual addresses, so we use __pa() here. */
@@ -816,8 +827,9 @@ static struct notifier_block paniced = {
816/* Setting up memory is fairly easy. */ 827/* Setting up memory is fairly easy. */
817static __init char *lguest_memory_setup(void) 828static __init char *lguest_memory_setup(void)
818{ 829{
819 /* We do this here and not earlier because lockcheck barfs if we do it 830 /* We do this here and not earlier because lockcheck used to barf if we
820 * before start_kernel() */ 831 * did it before start_kernel(). I think we fixed that, so it'd be
832 * nice to move it back to lguest_init. Patch welcome... */
821 atomic_notifier_chain_register(&panic_notifier_list, &paniced); 833 atomic_notifier_chain_register(&panic_notifier_list, &paniced);
822 834
823 /* The Linux bootloader header contains an "e820" memory map: the 835 /* The Linux bootloader header contains an "e820" memory map: the
@@ -850,12 +862,19 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
850 return len; 862 return len;
851} 863}
852 864
865/* Rebooting also tells the Host we're finished, but the RESTART flag tells the
866 * Launcher to reboot us. */
867static void lguest_restart(char *reason)
868{
869 hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0);
870}
871
853/*G:050 872/*G:050
854 * Patching (Powerfully Placating Performance Pedants) 873 * Patching (Powerfully Placating Performance Pedants)
855 * 874 *
856 * We have already seen that pv_ops structures let us replace simple 875 * We have already seen that pv_ops structures let us replace simple native
857 * native instructions with calls to the appropriate back end all throughout 876 * instructions with calls to the appropriate back end all throughout the
858 * the kernel. This allows the same kernel to run as a Guest and as a native 877 * kernel. This allows the same kernel to run as a Guest and as a native
859 * kernel, but it's slow because of all the indirect branches. 878 * kernel, but it's slow because of all the indirect branches.
860 * 879 *
861 * Remember that David Wheeler quote about "Any problem in computer science can 880 * Remember that David Wheeler quote about "Any problem in computer science can
@@ -908,14 +927,9 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
908 return insn_len; 927 return insn_len;
909} 928}
910 929
911static void lguest_restart(char *reason) 930/*G:030 Once we get to lguest_init(), we know we're a Guest. The various
912{ 931 * pv_ops structures in the kernel provide points for (almost) every routine we
913 hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0); 932 * have to override to avoid privileged instructions. */
914}
915
916/*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops
917 * structures in the kernel provide points for (almost) every routine we have
918 * to override to avoid privileged instructions. */
919__init void lguest_init(void) 933__init void lguest_init(void)
920{ 934{
921 /* We're under lguest, paravirt is enabled, and we're running at 935 /* We're under lguest, paravirt is enabled, and we're running at
@@ -1003,9 +1017,9 @@ __init void lguest_init(void)
1003 * the normal data segment to get through booting. */ 1017 * the normal data segment to get through booting. */
1004 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); 1018 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
1005 1019
1006 /* The Host uses the top of the Guest's virtual address space for the 1020 /* The Host<->Guest Switcher lives at the top of our address space, and
1007 * Host<->Guest Switcher, and it tells us how big that is in 1021 * the Host told us how big it is when we made LGUEST_INIT hypercall:
1008 * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */ 1022 * it put the answer in lguest_data.reserve_mem */
1009 reserve_top_address(lguest_data.reserve_mem); 1023 reserve_top_address(lguest_data.reserve_mem);
1010 1024
1011 /* If we don't initialize the lock dependency checker now, it crashes 1025 /* If we don't initialize the lock dependency checker now, it crashes
@@ -1027,6 +1041,7 @@ __init void lguest_init(void)
1027 /* Math is always hard! */ 1041 /* Math is always hard! */
1028 new_cpu_data.hard_math = 1; 1042 new_cpu_data.hard_math = 1;
1029 1043
1044 /* We don't have features. We have puppies! Puppies! */
1030#ifdef CONFIG_X86_MCE 1045#ifdef CONFIG_X86_MCE
1031 mce_disabled = 1; 1046 mce_disabled = 1;
1032#endif 1047#endif
@@ -1044,10 +1059,11 @@ __init void lguest_init(void)
1044 virtio_cons_early_init(early_put_chars); 1059 virtio_cons_early_init(early_put_chars);
1045 1060
1046 /* Last of all, we set the power management poweroff hook to point to 1061 /* Last of all, we set the power management poweroff hook to point to
1047 * the Guest routine to power off. */ 1062 * the Guest routine to power off, and the reboot hook to our restart
1063 * routine. */
1048 pm_power_off = lguest_power_off; 1064 pm_power_off = lguest_power_off;
1049
1050 machine_ops.restart = lguest_restart; 1065 machine_ops.restart = lguest_restart;
1066
1051 /* Now we're set up, call start_kernel() in init/main.c and we proceed 1067 /* Now we're set up, call start_kernel() in init/main.c and we proceed
1052 * to boot as normal. It never returns. */ 1068 * to boot as normal. It never returns. */
1053 start_kernel(); 1069 start_kernel();
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 95b6fbcded63..5c7cef34c9e7 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -5,13 +5,20 @@
5#include <asm/thread_info.h> 5#include <asm/thread_info.h>
6#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
7 7
8/*G:020 This is where we begin: head.S notes that the boot header's platform 8/*G:020 Our story starts with the kernel booting into startup_32 in
9 * type field is "1" (lguest), so calls us here. 9 * arch/x86/kernel/head_32.S. It expects a boot header, which is created by
10 * the bootloader (the Launcher in our case).
11 *
12 * The startup_32 function does very little: it clears the uninitialized global
13 * C variables which we expect to be zero (ie. BSS) and then copies the boot
14 * header and kernel command line somewhere safe. Finally it checks the
15 * 'hardware_subarch' field. This was introduced in 2.6.24 for lguest and Xen:
16 * if it's set to '1' (lguest's assigned number), then it calls us here.
10 * 17 *
11 * WARNING: be very careful here! We're running at addresses equal to physical 18 * WARNING: be very careful here! We're running at addresses equal to physical
12 * addesses (around 0), not above PAGE_OFFSET as most code expectes 19 * addesses (around 0), not above PAGE_OFFSET as most code expectes
13 * (eg. 0xC0000000). Jumps are relative, so they're OK, but we can't touch any 20 * (eg. 0xC0000000). Jumps are relative, so they're OK, but we can't touch any
14 * data. 21 * data without remembering to subtract __PAGE_OFFSET!
15 * 22 *
16 * The .section line puts this code in .init.text so it will be discarded after 23 * The .section line puts this code in .init.text so it will be discarded after
17 * boot. */ 24 * boot. */
@@ -24,7 +31,7 @@ ENTRY(lguest_entry)
24 int $LGUEST_TRAP_ENTRY 31 int $LGUEST_TRAP_ENTRY
25 32
26 /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl 33 /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl
27 * instruction uses %esi implicitly as the source for the copy we' 34 * instruction uses %esi implicitly as the source for the copy we're
28 * about to do. */ 35 * about to do. */
29 movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi 36 movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi
30 37
diff --git a/arch/x86/mach-rdc321x/gpio.c b/arch/x86/mach-rdc321x/gpio.c
index 031269163bd6..247f33d3a407 100644
--- a/arch/x86/mach-rdc321x/gpio.c
+++ b/arch/x86/mach-rdc321x/gpio.c
@@ -1,91 +1,194 @@
1/* 1/*
2 * Copyright (C) 2007, OpenWrt.org, Florian Fainelli <florian@openwrt.org> 2 * GPIO support for RDC SoC R3210/R8610
3 * RDC321x architecture specific GPIO support 3 *
4 * Copyright (C) 2007, Florian Fainelli <florian@openwrt.org>
5 * Copyright (C) 2008, Volker Weiss <dev@tintuc.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
4 * 20 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; either version 2 of the License, or (at your
8 * option) any later version.
9 */ 21 */
10 22
11#include <linux/autoconf.h> 23
12#include <linux/init.h> 24#include <linux/spinlock.h>
13#include <linux/io.h> 25#include <linux/io.h>
14#include <linux/types.h> 26#include <linux/types.h>
15#include <linux/module.h> 27#include <linux/module.h>
16#include <linux/delay.h>
17 28
29#include <asm/gpio.h>
18#include <asm/mach-rdc321x/rdc321x_defs.h> 30#include <asm/mach-rdc321x/rdc321x_defs.h>
19 31
20static inline int rdc_gpio_is_valid(unsigned gpio) 32
33/* spin lock to protect our private copy of GPIO data register plus
34 the access to PCI conf registers. */
35static DEFINE_SPINLOCK(gpio_lock);
36
37/* copy of GPIO data registers */
38static u32 gpio_data_reg1;
39static u32 gpio_data_reg2;
40
41static u32 gpio_request_data[2];
42
43
44static inline void rdc321x_conf_write(unsigned addr, u32 value)
21{ 45{
22 return (gpio <= RDC_MAX_GPIO); 46 outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
47 outl(value, RDC3210_CFGREG_DATA);
23} 48}
24 49
25static unsigned int rdc_gpio_read(unsigned gpio) 50static inline void rdc321x_conf_or(unsigned addr, u32 value)
26{ 51{
27 unsigned int val; 52 outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
28 53 value |= inl(RDC3210_CFGREG_DATA);
29 val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x84:0x48)); 54 outl(value, RDC3210_CFGREG_DATA);
30 outl(val, RDC3210_CFGREG_ADDR);
31 udelay(10);
32 val = inl(RDC3210_CFGREG_DATA);
33 val |= (0x1 << (gpio & 0x1F));
34 outl(val, RDC3210_CFGREG_DATA);
35 udelay(10);
36 val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x88:0x4C));
37 outl(val, RDC3210_CFGREG_ADDR);
38 udelay(10);
39 val = inl(RDC3210_CFGREG_DATA);
40
41 return val;
42} 55}
43 56
44static void rdc_gpio_write(unsigned int val) 57static inline u32 rdc321x_conf_read(unsigned addr)
45{ 58{
46 if (val) { 59 outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
47 outl(val, RDC3210_CFGREG_DATA); 60
48 udelay(10); 61 return inl(RDC3210_CFGREG_DATA);
49 }
50} 62}
51 63
52int rdc_gpio_get_value(unsigned gpio) 64/* configure pin as GPIO */
65static void rdc321x_configure_gpio(unsigned gpio)
66{
67 unsigned long flags;
68
69 spin_lock_irqsave(&gpio_lock, flags);
70 rdc321x_conf_or(gpio < 32
71 ? RDC321X_GPIO_CTRL_REG1 : RDC321X_GPIO_CTRL_REG2,
72 1 << (gpio & 0x1f));
73 spin_unlock_irqrestore(&gpio_lock, flags);
74}
75
76/* initially setup the 2 copies of the gpio data registers.
77 This function must be called by the platform setup code. */
78void __init rdc321x_gpio_setup()
79{
80 /* this might not be, what others (BIOS, bootloader, etc.)
81 wrote to these registers before, but it's a good guess. Still
82 better than just using 0xffffffff. */
83
84 gpio_data_reg1 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG1);
85 gpio_data_reg2 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG2);
86}
87
88/* determine, if gpio number is valid */
89static inline int rdc321x_is_gpio(unsigned gpio)
90{
91 return gpio <= RDC321X_MAX_GPIO;
92}
93
94/* request GPIO */
95int rdc_gpio_request(unsigned gpio, const char *label)
53{ 96{
54 if (rdc_gpio_is_valid(gpio)) 97 unsigned long flags;
55 return (int)rdc_gpio_read(gpio); 98
56 else 99 if (!rdc321x_is_gpio(gpio))
57 return -EINVAL; 100 return -EINVAL;
101
102 spin_lock_irqsave(&gpio_lock, flags);
103 if (gpio_request_data[(gpio & 0x20) ? 1 : 0] & (1 << (gpio & 0x1f)))
104 goto inuse;
105 gpio_request_data[(gpio & 0x20) ? 1 : 0] |= (1 << (gpio & 0x1f));
106 spin_unlock_irqrestore(&gpio_lock, flags);
107
108 return 0;
109inuse:
110 spin_unlock_irqrestore(&gpio_lock, flags);
111 return -EINVAL;
58} 112}
59EXPORT_SYMBOL(rdc_gpio_get_value); 113EXPORT_SYMBOL(rdc_gpio_request);
60 114
61void rdc_gpio_set_value(unsigned gpio, int value) 115/* release previously-claimed GPIO */
116void rdc_gpio_free(unsigned gpio)
62{ 117{
63 unsigned int val; 118 unsigned long flags;
64 119
65 if (!rdc_gpio_is_valid(gpio)) 120 if (!rdc321x_is_gpio(gpio))
66 return; 121 return;
67 122
68 val = rdc_gpio_read(gpio); 123 spin_lock_irqsave(&gpio_lock, flags);
124 gpio_request_data[(gpio & 0x20) ? 1 : 0] &= ~(1 << (gpio & 0x1f));
125 spin_unlock_irqrestore(&gpio_lock, flags);
126}
127EXPORT_SYMBOL(rdc_gpio_free);
128
129/* read GPIO pin */
130int rdc_gpio_get_value(unsigned gpio)
131{
132 u32 reg;
133 unsigned long flags;
134
135 spin_lock_irqsave(&gpio_lock, flags);
136 reg = rdc321x_conf_read(gpio < 32
137 ? RDC321X_GPIO_DATA_REG1 : RDC321X_GPIO_DATA_REG2);
138 spin_unlock_irqrestore(&gpio_lock, flags);
69 139
70 if (value) 140 return (1 << (gpio & 0x1f)) & reg ? 1 : 0;
71 val &= ~(0x1 << (gpio & 0x1F)); 141}
72 else 142EXPORT_SYMBOL(rdc_gpio_get_value);
73 val |= (0x1 << (gpio & 0x1F));
74 143
75 rdc_gpio_write(val); 144/* set GPIO pin to value */
145void rdc_gpio_set_value(unsigned gpio, int value)
146{
147 unsigned long flags;
148 u32 reg;
149
150 reg = 1 << (gpio & 0x1f);
151 if (gpio < 32) {
152 spin_lock_irqsave(&gpio_lock, flags);
153 if (value)
154 gpio_data_reg1 |= reg;
155 else
156 gpio_data_reg1 &= ~reg;
157 rdc321x_conf_write(RDC321X_GPIO_DATA_REG1, gpio_data_reg1);
158 spin_unlock_irqrestore(&gpio_lock, flags);
159 } else {
160 spin_lock_irqsave(&gpio_lock, flags);
161 if (value)
162 gpio_data_reg2 |= reg;
163 else
164 gpio_data_reg2 &= ~reg;
165 rdc321x_conf_write(RDC321X_GPIO_DATA_REG2, gpio_data_reg2);
166 spin_unlock_irqrestore(&gpio_lock, flags);
167 }
76} 168}
77EXPORT_SYMBOL(rdc_gpio_set_value); 169EXPORT_SYMBOL(rdc_gpio_set_value);
78 170
171/* configure GPIO pin as input */
79int rdc_gpio_direction_input(unsigned gpio) 172int rdc_gpio_direction_input(unsigned gpio)
80{ 173{
174 if (!rdc321x_is_gpio(gpio))
175 return -EINVAL;
176
177 rdc321x_configure_gpio(gpio);
178
81 return 0; 179 return 0;
82} 180}
83EXPORT_SYMBOL(rdc_gpio_direction_input); 181EXPORT_SYMBOL(rdc_gpio_direction_input);
84 182
183/* configure GPIO pin as output and set value */
85int rdc_gpio_direction_output(unsigned gpio, int value) 184int rdc_gpio_direction_output(unsigned gpio, int value)
86{ 185{
186 if (!rdc321x_is_gpio(gpio))
187 return -EINVAL;
188
189 gpio_set_value(gpio, value);
190 rdc321x_configure_gpio(gpio);
191
87 return 0; 192 return 0;
88} 193}
89EXPORT_SYMBOL(rdc_gpio_direction_output); 194EXPORT_SYMBOL(rdc_gpio_direction_output);
90
91
diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c
index dda6024a5862..a037041817c7 100644
--- a/arch/x86/mach-rdc321x/platform.c
+++ b/arch/x86/mach-rdc321x/platform.c
@@ -62,6 +62,8 @@ static struct platform_device *rdc321x_devs[] = {
62 62
63static int __init rdc_board_setup(void) 63static int __init rdc_board_setup(void)
64{ 64{
65 rdc321x_gpio_setup();
66
65 return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs)); 67 return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs));
66} 68}
67 69
diff --git a/arch/x86/mach-visws/traps.c b/arch/x86/mach-visws/traps.c
index 843b67acf43b..bfac6ba10f8a 100644
--- a/arch/x86/mach-visws/traps.c
+++ b/arch/x86/mach-visws/traps.c
@@ -46,8 +46,9 @@ static __init void cobalt_init(void)
46 */ 46 */
47 set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); 47 set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
48 setup_local_APIC(); 48 setup_local_APIC();
49 printk(KERN_INFO "Local APIC Version %#lx, ID %#lx\n", 49 printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
50 apic_read(APIC_LVR), apic_read(APIC_ID)); 50 (unsigned int)apic_read(APIC_LVR),
51 (unsigned int)apic_read(APIC_ID));
51 52
52 set_fixmap(FIX_CO_CPU, CO_CPU_PHYS); 53 set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
53 set_fixmap(FIX_CO_APIC, CO_APIC_PHYS); 54 set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index c394ca0720b8..8e25e06ff730 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -324,7 +324,6 @@ unsigned long __init setup_memory(void)
324 * this space and use it to adjust the boundary between ZONE_NORMAL 324 * this space and use it to adjust the boundary between ZONE_NORMAL
325 * and ZONE_HIGHMEM. 325 * and ZONE_HIGHMEM.
326 */ 326 */
327 find_max_pfn();
328 get_memcfg_numa(); 327 get_memcfg_numa();
329 328
330 kva_pages = calculate_numa_remap_pages(); 329 kva_pages = calculate_numa_remap_pages();
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fdc667422df9..ec08d8389850 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -91,12 +91,10 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
91 int prefetch = 0; 91 int prefetch = 0;
92 unsigned char *max_instr; 92 unsigned char *max_instr;
93 93
94#ifdef CONFIG_X86_32 94 /*
95 if (!(__supported_pte_mask & _PAGE_NX)) 95 * If it was a exec (instruction fetch) fault on NX page, then
96 return 0; 96 * do not ignore the fault:
97#endif 97 */
98
99 /* If it was a exec fault on NX page, ignore */
100 if (error_code & PF_INSTR) 98 if (error_code & PF_INSTR)
101 return 0; 99 return 0;
102 100
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 3d936f232704..9cf33d3ee5bc 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -73,15 +73,15 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
73{ 73{
74 enum fixed_addresses idx; 74 enum fixed_addresses idx;
75 unsigned long vaddr; 75 unsigned long vaddr;
76 /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
77
78 debug_kmap_atomic_prot(type);
79 76
77 /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
80 pagefault_disable(); 78 pagefault_disable();
81 79
82 if (!PageHighMem(page)) 80 if (!PageHighMem(page))
83 return page_address(page); 81 return page_address(page);
84 82
83 debug_kmap_atomic_prot(type);
84
85 idx = type + KM_TYPE_NR*smp_processor_id(); 85 idx = type + KM_TYPE_NR*smp_processor_id();
86 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 86 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
87 BUG_ON(!pte_none(*(kmap_pte-idx))); 87 BUG_ON(!pte_none(*(kmap_pte-idx)));
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 4fbafb4bc2f0..0b3d567e686d 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -178,7 +178,7 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
178 178
179 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; 179 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
180 180
181 WARN_ON(!PageCompound(page)); 181 WARN_ON(!PageHead(page));
182 182
183 return page; 183 return page;
184} 184}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 8fe576baa148..794895c6dcc9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -106,7 +106,7 @@ static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
106 * have to convert them into an offset in a page-aligned mapping, but the 106 * have to convert them into an offset in a page-aligned mapping, but the
107 * caller shouldn't need to know that small detail. 107 * caller shouldn't need to know that small detail.
108 */ 108 */
109static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, 109static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
110 enum ioremap_mode mode) 110 enum ioremap_mode mode)
111{ 111{
112 unsigned long pfn, offset, last_addr, vaddr; 112 unsigned long pfn, offset, last_addr, vaddr;
@@ -137,7 +137,11 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
137 switch (mode) { 137 switch (mode) {
138 case IOR_MODE_UNCACHED: 138 case IOR_MODE_UNCACHED:
139 default: 139 default:
140 prot = PAGE_KERNEL_NOCACHE; 140 /*
141 * FIXME: we will use UC MINUS for now, as video fb drivers
142 * depend on it. Upcoming ioremap_wc() will fix this behavior.
143 */
144 prot = PAGE_KERNEL_UC_MINUS;
141 break; 145 break;
142 case IOR_MODE_CACHED: 146 case IOR_MODE_CACHED:
143 prot = PAGE_KERNEL; 147 prot = PAGE_KERNEL;
@@ -193,13 +197,13 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
193 * 197 *
194 * Must be freed with iounmap. 198 * Must be freed with iounmap.
195 */ 199 */
196void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size) 200void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
197{ 201{
198 return __ioremap(phys_addr, size, IOR_MODE_UNCACHED); 202 return __ioremap(phys_addr, size, IOR_MODE_UNCACHED);
199} 203}
200EXPORT_SYMBOL(ioremap_nocache); 204EXPORT_SYMBOL(ioremap_nocache);
201 205
202void __iomem *ioremap_cache(unsigned long phys_addr, unsigned long size) 206void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
203{ 207{
204 return __ioremap(phys_addr, size, IOR_MODE_CACHED); 208 return __ioremap(phys_addr, size, IOR_MODE_CACHED);
205} 209}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 8ccfee10f5b5..16b82ad34b96 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -221,8 +221,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
221 bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); 221 bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
222 if (bootmap == NULL) { 222 if (bootmap == NULL) {
223 if (nodedata_phys < start || nodedata_phys >= end) 223 if (nodedata_phys < start || nodedata_phys >= end)
224 free_bootmem((unsigned long)node_data[nodeid], 224 free_bootmem(nodedata_phys, pgdat_size);
225 pgdat_size);
226 node_data[nodeid] = NULL; 225 node_data[nodeid] = NULL;
227 return; 226 return;
228 } 227 }
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 14e48b5a94ba..7b79f6be4e7d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -771,7 +771,7 @@ static inline int change_page_attr_clear(unsigned long addr, int numpages,
771int set_memory_uc(unsigned long addr, int numpages) 771int set_memory_uc(unsigned long addr, int numpages)
772{ 772{
773 return change_page_attr_set(addr, numpages, 773 return change_page_attr_set(addr, numpages,
774 __pgprot(_PAGE_PCD | _PAGE_PWT)); 774 __pgprot(_PAGE_PCD));
775} 775}
776EXPORT_SYMBOL(set_memory_uc); 776EXPORT_SYMBOL(set_memory_uc);
777 777
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 8b9ee27805fd..de4e6f05840b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -95,7 +95,7 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
95 * 95 *
96 * 0: not available, 1: available 96 * 0: not available, 1: available
97 */ 97 */
98static int have_vcpu_info_placement = 0; 98static int have_vcpu_info_placement = 1;
99 99
100static void __init xen_vcpu_setup(int cpu) 100static void __init xen_vcpu_setup(int cpu)
101{ 101{
@@ -103,6 +103,7 @@ static void __init xen_vcpu_setup(int cpu)
103 int err; 103 int err;
104 struct vcpu_info *vcpup; 104 struct vcpu_info *vcpup;
105 105
106 BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info);
106 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 107 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
107 108
108 if (!have_vcpu_info_placement) 109 if (!have_vcpu_info_placement)
@@ -805,33 +806,43 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
805 PFN_DOWN(__pa(xen_start_info->pt_base))); 806 PFN_DOWN(__pa(xen_start_info->pt_base)));
806} 807}
807 808
808static __init void xen_pagetable_setup_done(pgd_t *base) 809static __init void setup_shared_info(void)
809{ 810{
810 /* This will work as long as patching hasn't happened yet
811 (which it hasn't) */
812 pv_mmu_ops.alloc_pt = xen_alloc_pt;
813 pv_mmu_ops.alloc_pd = xen_alloc_pd;
814 pv_mmu_ops.release_pt = xen_release_pt;
815 pv_mmu_ops.release_pd = xen_release_pt;
816 pv_mmu_ops.set_pte = xen_set_pte;
817
818 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 811 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
812 unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
813
819 /* 814 /*
820 * Create a mapping for the shared info page. 815 * Create a mapping for the shared info page.
821 * Should be set_fixmap(), but shared_info is a machine 816 * Should be set_fixmap(), but shared_info is a machine
822 * address with no corresponding pseudo-phys address. 817 * address with no corresponding pseudo-phys address.
823 */ 818 */
824 set_pte_mfn(fix_to_virt(FIX_PARAVIRT_BOOTMAP), 819 set_pte_mfn(addr,
825 PFN_DOWN(xen_start_info->shared_info), 820 PFN_DOWN(xen_start_info->shared_info),
826 PAGE_KERNEL); 821 PAGE_KERNEL);
827 822
828 HYPERVISOR_shared_info = 823 HYPERVISOR_shared_info = (struct shared_info *)addr;
829 (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
830
831 } else 824 } else
832 HYPERVISOR_shared_info = 825 HYPERVISOR_shared_info =
833 (struct shared_info *)__va(xen_start_info->shared_info); 826 (struct shared_info *)__va(xen_start_info->shared_info);
834 827
828#ifndef CONFIG_SMP
829 /* In UP this is as good a place as any to set up shared info */
830 xen_setup_vcpu_info_placement();
831#endif
832}
833
834static __init void xen_pagetable_setup_done(pgd_t *base)
835{
836 /* This will work as long as patching hasn't happened yet
837 (which it hasn't) */
838 pv_mmu_ops.alloc_pt = xen_alloc_pt;
839 pv_mmu_ops.alloc_pd = xen_alloc_pd;
840 pv_mmu_ops.release_pt = xen_release_pt;
841 pv_mmu_ops.release_pd = xen_release_pt;
842 pv_mmu_ops.set_pte = xen_set_pte;
843
844 setup_shared_info();
845
835 /* Actually pin the pagetable down, but we can't set PG_pinned 846 /* Actually pin the pagetable down, but we can't set PG_pinned
836 yet because the page structures don't exist yet. */ 847 yet because the page structures don't exist yet. */
837 { 848 {
@@ -1182,15 +1193,9 @@ asmlinkage void __init xen_start_kernel(void)
1182 x86_write_percpu(xen_cr3, __pa(pgd)); 1193 x86_write_percpu(xen_cr3, __pa(pgd));
1183 x86_write_percpu(xen_current_cr3, __pa(pgd)); 1194 x86_write_percpu(xen_current_cr3, __pa(pgd));
1184 1195
1185#ifdef CONFIG_SMP
1186 /* Don't do the full vcpu_info placement stuff until we have a 1196 /* Don't do the full vcpu_info placement stuff until we have a
1187 possible map. */ 1197 possible map and a non-dummy shared_info. */
1188 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1198 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
1189#else
1190 /* May as well do it now, since there's no good time to call
1191 it later on UP. */
1192 xen_setup_vcpu_info_placement();
1193#endif
1194 1199
1195 pv_info.kernel_rpl = 1; 1200 pv_info.kernel_rpl = 1;
1196 if (xen_feature(XENFEAT_supervisor_mode_kernel)) 1201 if (xen_feature(XENFEAT_supervisor_mode_kernel))
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 1a43b60c0c62..6b7190449d07 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -33,12 +33,17 @@
33 events, then enter the hypervisor to get them handled. 33 events, then enter the hypervisor to get them handled.
34 */ 34 */
35ENTRY(xen_irq_enable_direct) 35ENTRY(xen_irq_enable_direct)
36 /* Clear mask and test pending */ 36 /* Unmask events */
37 andw $0x00ff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending 37 movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
38
38 /* Preempt here doesn't matter because that will deal with 39 /* Preempt here doesn't matter because that will deal with
39 any pending interrupts. The pending check may end up being 40 any pending interrupts. The pending check may end up being
40 run on the wrong CPU, but that doesn't hurt. */ 41 run on the wrong CPU, but that doesn't hurt. */
42
43 /* Test for pending */
44 testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
41 jz 1f 45 jz 1f
46
422: call check_events 472: call check_events
431: 481:
44ENDPATCH(xen_irq_enable_direct) 49ENDPATCH(xen_irq_enable_direct)