diff options
Diffstat (limited to 'arch/x86')
33 files changed, 502 insertions, 380 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 45d79ea890ae..5fed98ca0e1f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -65,7 +65,8 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt); | |||
65 | get them easily into strings. */ | 65 | get them easily into strings. */ |
66 | asm("\t.section .rodata, \"a\"\nintelnops: " | 66 | asm("\t.section .rodata, \"a\"\nintelnops: " |
67 | GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 | 67 | GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 |
68 | GENERIC_NOP7 GENERIC_NOP8); | 68 | GENERIC_NOP7 GENERIC_NOP8 |
69 | "\t.previous"); | ||
69 | extern const unsigned char intelnops[]; | 70 | extern const unsigned char intelnops[]; |
70 | static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { | 71 | static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { |
71 | NULL, | 72 | NULL, |
@@ -83,7 +84,8 @@ static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { | |||
83 | #ifdef K8_NOP1 | 84 | #ifdef K8_NOP1 |
84 | asm("\t.section .rodata, \"a\"\nk8nops: " | 85 | asm("\t.section .rodata, \"a\"\nk8nops: " |
85 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 | 86 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 |
86 | K8_NOP7 K8_NOP8); | 87 | K8_NOP7 K8_NOP8 |
88 | "\t.previous"); | ||
87 | extern const unsigned char k8nops[]; | 89 | extern const unsigned char k8nops[]; |
88 | static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { | 90 | static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { |
89 | NULL, | 91 | NULL, |
@@ -101,7 +103,8 @@ static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { | |||
101 | #ifdef K7_NOP1 | 103 | #ifdef K7_NOP1 |
102 | asm("\t.section .rodata, \"a\"\nk7nops: " | 104 | asm("\t.section .rodata, \"a\"\nk7nops: " |
103 | K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 | 105 | K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 |
104 | K7_NOP7 K7_NOP8); | 106 | K7_NOP7 K7_NOP8 |
107 | "\t.previous"); | ||
105 | extern const unsigned char k7nops[]; | 108 | extern const unsigned char k7nops[]; |
106 | static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { | 109 | static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { |
107 | NULL, | 110 | NULL, |
@@ -119,7 +122,8 @@ static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { | |||
119 | #ifdef P6_NOP1 | 122 | #ifdef P6_NOP1 |
120 | asm("\t.section .rodata, \"a\"\np6nops: " | 123 | asm("\t.section .rodata, \"a\"\np6nops: " |
121 | P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 | 124 | P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 |
122 | P6_NOP7 P6_NOP8); | 125 | P6_NOP7 P6_NOP8 |
126 | "\t.previous"); | ||
123 | extern const unsigned char p6nops[]; | 127 | extern const unsigned char p6nops[]; |
124 | static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { | 128 | static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { |
125 | NULL, | 129 | NULL, |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 027e5c003b16..170d2f5523b2 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -143,14 +143,6 @@ static void __init check_config(void) | |||
143 | #endif | 143 | #endif |
144 | 144 | ||
145 | /* | 145 | /* |
146 | * If we configured ourselves for a TSC, we'd better have one! | ||
147 | */ | ||
148 | #ifdef CONFIG_X86_TSC | ||
149 | if (!cpu_has_tsc) | ||
150 | panic("Kernel compiled for Pentium+, requires TSC feature!"); | ||
151 | #endif | ||
152 | |||
153 | /* | ||
154 | * If we were told we had a good local APIC, check for buggy Pentia, | 146 | * If we were told we had a good local APIC, check for buggy Pentia, |
155 | * i.e. all B steppings and the C2 stepping of P54C when using their | 147 | * i.e. all B steppings and the C2 stepping of P54C when using their |
156 | * integrated APIC (see 11AP erratum in "Pentium Processor | 148 | * integrated APIC (see 11AP erratum in "Pentium Processor |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index f2b5a621d27b..8a85c93bd62a 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | |||
@@ -63,7 +63,7 @@ static struct cpufreq_frequency_table speedstep_freqs[] = { | |||
63 | */ | 63 | */ |
64 | static int speedstep_smi_ownership (void) | 64 | static int speedstep_smi_ownership (void) |
65 | { | 65 | { |
66 | u32 command, result, magic; | 66 | u32 command, result, magic, dummy; |
67 | u32 function = GET_SPEEDSTEP_OWNER; | 67 | u32 function = GET_SPEEDSTEP_OWNER; |
68 | unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; | 68 | unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; |
69 | 69 | ||
@@ -73,8 +73,11 @@ static int speedstep_smi_ownership (void) | |||
73 | dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); | 73 | dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); |
74 | 74 | ||
75 | __asm__ __volatile__( | 75 | __asm__ __volatile__( |
76 | "push %%ebp\n" | ||
76 | "out %%al, (%%dx)\n" | 77 | "out %%al, (%%dx)\n" |
77 | : "=D" (result) | 78 | "pop %%ebp\n" |
79 | : "=D" (result), "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy), | ||
80 | "=S" (dummy) | ||
78 | : "a" (command), "b" (function), "c" (0), "d" (smi_port), | 81 | : "a" (command), "b" (function), "c" (0), "d" (smi_port), |
79 | "D" (0), "S" (magic) | 82 | "D" (0), "S" (magic) |
80 | : "memory" | 83 | : "memory" |
@@ -96,7 +99,7 @@ static int speedstep_smi_ownership (void) | |||
96 | */ | 99 | */ |
97 | static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) | 100 | static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) |
98 | { | 101 | { |
99 | u32 command, result = 0, edi, high_mhz, low_mhz; | 102 | u32 command, result = 0, edi, high_mhz, low_mhz, dummy; |
100 | u32 state=0; | 103 | u32 state=0; |
101 | u32 function = GET_SPEEDSTEP_FREQS; | 104 | u32 function = GET_SPEEDSTEP_FREQS; |
102 | 105 | ||
@@ -109,10 +112,12 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) | |||
109 | 112 | ||
110 | dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); | 113 | dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); |
111 | 114 | ||
112 | __asm__ __volatile__("movl $0, %%edi\n" | 115 | __asm__ __volatile__( |
116 | "push %%ebp\n" | ||
113 | "out %%al, (%%dx)\n" | 117 | "out %%al, (%%dx)\n" |
114 | : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi) | 118 | "pop %%ebp" |
115 | : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) | 119 | : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi), "=S" (dummy) |
120 | : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) | ||
116 | ); | 121 | ); |
117 | 122 | ||
118 | dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); | 123 | dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); |
@@ -135,16 +140,18 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) | |||
135 | static int speedstep_get_state (void) | 140 | static int speedstep_get_state (void) |
136 | { | 141 | { |
137 | u32 function=GET_SPEEDSTEP_STATE; | 142 | u32 function=GET_SPEEDSTEP_STATE; |
138 | u32 result, state, edi, command; | 143 | u32 result, state, edi, command, dummy; |
139 | 144 | ||
140 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); | 145 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); |
141 | 146 | ||
142 | dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); | 147 | dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); |
143 | 148 | ||
144 | __asm__ __volatile__("movl $0, %%edi\n" | 149 | __asm__ __volatile__( |
150 | "push %%ebp\n" | ||
145 | "out %%al, (%%dx)\n" | 151 | "out %%al, (%%dx)\n" |
146 | : "=a" (result), "=b" (state), "=D" (edi) | 152 | "pop %%ebp\n" |
147 | : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0) | 153 | : "=a" (result), "=b" (state), "=D" (edi), "=c" (dummy), "=d" (dummy), "=S" (dummy) |
154 | : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0), "D" (0) | ||
148 | ); | 155 | ); |
149 | 156 | ||
150 | dprintk("state is %x, result is %x\n", state, result); | 157 | dprintk("state is %x, result is %x\n", state, result); |
@@ -160,7 +167,7 @@ static int speedstep_get_state (void) | |||
160 | */ | 167 | */ |
161 | static void speedstep_set_state (unsigned int state) | 168 | static void speedstep_set_state (unsigned int state) |
162 | { | 169 | { |
163 | unsigned int result = 0, command, new_state; | 170 | unsigned int result = 0, command, new_state, dummy; |
164 | unsigned long flags; | 171 | unsigned long flags; |
165 | unsigned int function=SET_SPEEDSTEP_STATE; | 172 | unsigned int function=SET_SPEEDSTEP_STATE; |
166 | unsigned int retry = 0; | 173 | unsigned int retry = 0; |
@@ -182,10 +189,12 @@ static void speedstep_set_state (unsigned int state) | |||
182 | } | 189 | } |
183 | retry++; | 190 | retry++; |
184 | __asm__ __volatile__( | 191 | __asm__ __volatile__( |
185 | "movl $0, %%edi\n" | 192 | "push %%ebp\n" |
186 | "out %%al, (%%dx)\n" | 193 | "out %%al, (%%dx)\n" |
187 | : "=b" (new_state), "=D" (result) | 194 | "pop %%ebp" |
188 | : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) | 195 | : "=b" (new_state), "=D" (result), "=c" (dummy), "=a" (dummy), |
196 | "=d" (dummy), "=S" (dummy) | ||
197 | : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) | ||
189 | ); | 198 | ); |
190 | } while ((new_state != state) && (retry <= SMI_TRIES)); | 199 | } while ((new_state != state) && (retry <= SMI_TRIES)); |
191 | 200 | ||
@@ -195,7 +204,7 @@ static void speedstep_set_state (unsigned int state) | |||
195 | if (new_state == state) { | 204 | if (new_state == state) { |
196 | dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); | 205 | dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); |
197 | } else { | 206 | } else { |
198 | printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result); | 207 | printk(KERN_ERR "cpufreq: change to state %u failed with new_state %u and result %u\n", state, new_state, result); |
199 | } | 208 | } |
200 | 209 | ||
201 | return; | 210 | return; |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 103d61a59b19..3e18db4cefee 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -176,12 +176,13 @@ static inline void k8_enable_fixed_iorrs(void) | |||
176 | } | 176 | } |
177 | 177 | ||
178 | /** | 178 | /** |
179 | * Checks and updates an fixed-range MTRR if it differs from the value it | 179 | * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have |
180 | * should have. If K8 extentions are wanted, update the K8 SYSCFG MSR also. | 180 | * @msr: MSR address of the MTTR which should be checked and updated |
181 | * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information | 181 | * @changed: pointer which indicates whether the MTRR needed to be changed |
182 | * \param msr MSR address of the MTTR which should be checked and updated | 182 | * @msrwords: pointer to the MSR values which the MSR should have |
183 | * \param changed pointer which indicates whether the MTRR needed to be changed | 183 | * |
184 | * \param msrwords pointer to the MSR values which the MSR should have | 184 | * If K8 extentions are wanted, update the K8 SYSCFG MSR also. |
185 | * See AMD publication no. 24593, chapter 7.8.1, page 233 for more information. | ||
185 | */ | 186 | */ |
186 | static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) | 187 | static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) |
187 | { | 188 | { |
@@ -199,12 +200,15 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) | |||
199 | } | 200 | } |
200 | } | 201 | } |
201 | 202 | ||
203 | /** | ||
204 | * generic_get_free_region - Get a free MTRR. | ||
205 | * @base: The starting (base) address of the region. | ||
206 | * @size: The size (in bytes) of the region. | ||
207 | * @replace_reg: mtrr index to be replaced; set to invalid value if none. | ||
208 | * | ||
209 | * Returns: The index of the region on success, else negative on error. | ||
210 | */ | ||
202 | int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) | 211 | int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) |
203 | /* [SUMMARY] Get a free MTRR. | ||
204 | <base> The starting (base) address of the region. | ||
205 | <size> The size (in bytes) of the region. | ||
206 | [RETURNS] The index of the region on success, else -1 on error. | ||
207 | */ | ||
208 | { | 212 | { |
209 | int i, max; | 213 | int i, max; |
210 | mtrr_type ltype; | 214 | mtrr_type ltype; |
@@ -249,8 +253,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
249 | } | 253 | } |
250 | 254 | ||
251 | /** | 255 | /** |
252 | * Checks and updates the fixed-range MTRRs if they differ from the saved set | 256 | * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set |
253 | * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges() | 257 | * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges() |
254 | */ | 258 | */ |
255 | static int set_fixed_ranges(mtrr_type * frs) | 259 | static int set_fixed_ranges(mtrr_type * frs) |
256 | { | 260 | { |
@@ -294,13 +298,13 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) | |||
294 | 298 | ||
295 | static u32 deftype_lo, deftype_hi; | 299 | static u32 deftype_lo, deftype_hi; |
296 | 300 | ||
301 | /** | ||
302 | * set_mtrr_state - Set the MTRR state for this CPU. | ||
303 | * | ||
304 | * NOTE: The CPU must already be in a safe state for MTRR changes. | ||
305 | * RETURNS: 0 if no changes made, else a mask indicating what was changed. | ||
306 | */ | ||
297 | static unsigned long set_mtrr_state(void) | 307 | static unsigned long set_mtrr_state(void) |
298 | /* [SUMMARY] Set the MTRR state for this CPU. | ||
299 | <state> The MTRR state information to read. | ||
300 | <ctxt> Some relevant CPU context. | ||
301 | [NOTE] The CPU must already be in a safe state for MTRR changes. | ||
302 | [RETURNS] 0 if no changes made, else a mask indication what was changed. | ||
303 | */ | ||
304 | { | 308 | { |
305 | unsigned int i; | 309 | unsigned int i; |
306 | unsigned long change_mask = 0; | 310 | unsigned long change_mask = 0; |
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 9b838324b818..b943e10ad814 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -652,9 +652,6 @@ static void probe_nmi_watchdog(void) | |||
652 | wd_ops = &p6_wd_ops; | 652 | wd_ops = &p6_wd_ops; |
653 | break; | 653 | break; |
654 | case 15: | 654 | case 15: |
655 | if (boot_cpu_data.x86_model > 0x4) | ||
656 | return; | ||
657 | |||
658 | wd_ops = &p4_wd_ops; | 655 | wd_ops = &p4_wd_ops; |
659 | break; | 656 | break; |
660 | default: | 657 | default: |
@@ -670,8 +667,10 @@ int lapic_watchdog_init(unsigned nmi_hz) | |||
670 | { | 667 | { |
671 | if (!wd_ops) { | 668 | if (!wd_ops) { |
672 | probe_nmi_watchdog(); | 669 | probe_nmi_watchdog(); |
673 | if (!wd_ops) | 670 | if (!wd_ops) { |
671 | printk(KERN_INFO "NMI watchdog: CPU not supported\n"); | ||
674 | return -1; | 672 | return -1; |
673 | } | ||
675 | 674 | ||
676 | if (!wd_ops->reserve()) { | 675 | if (!wd_ops->reserve()) { |
677 | printk(KERN_ERR | 676 | printk(KERN_ERR |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 235fd6c77504..36652ea1a265 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -133,13 +133,16 @@ static void hpet_reserve_platform_timers(unsigned long id) | |||
133 | #ifdef CONFIG_HPET_EMULATE_RTC | 133 | #ifdef CONFIG_HPET_EMULATE_RTC |
134 | hpet_reserve_timer(&hd, 1); | 134 | hpet_reserve_timer(&hd, 1); |
135 | #endif | 135 | #endif |
136 | |||
136 | hd.hd_irq[0] = HPET_LEGACY_8254; | 137 | hd.hd_irq[0] = HPET_LEGACY_8254; |
137 | hd.hd_irq[1] = HPET_LEGACY_RTC; | 138 | hd.hd_irq[1] = HPET_LEGACY_RTC; |
138 | 139 | ||
139 | for (i = 2; i < nrtimers; timer++, i++) | 140 | for (i = 2; i < nrtimers; timer++, i++) |
140 | hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> | 141 | hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> |
141 | Tn_INT_ROUTE_CNF_SHIFT; | 142 | Tn_INT_ROUTE_CNF_SHIFT; |
143 | |||
142 | hpet_alloc(&hd); | 144 | hpet_alloc(&hd); |
145 | |||
143 | } | 146 | } |
144 | #else | 147 | #else |
145 | static void hpet_reserve_platform_timers(unsigned long id) { } | 148 | static void hpet_reserve_platform_timers(unsigned long id) { } |
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index c706a3061553..5921e5f0a640 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c | |||
@@ -78,6 +78,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { | |||
78 | }, | 78 | }, |
79 | { | 79 | { |
80 | .callback = dmi_io_delay_0xed_port, | 80 | .callback = dmi_io_delay_0xed_port, |
81 | .ident = "HP Pavilion dv6000", | ||
82 | .matches = { | ||
83 | DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), | ||
84 | DMI_MATCH(DMI_BOARD_NAME, "30B8") | ||
85 | } | ||
86 | }, | ||
87 | { | ||
88 | .callback = dmi_io_delay_0xed_port, | ||
81 | .ident = "HP Pavilion tx1000", | 89 | .ident = "HP Pavilion tx1000", |
82 | .matches = { | 90 | .matches = { |
83 | DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), | 91 | DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 236d2f8f7ddc..576a03db4511 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -233,6 +233,7 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
233 | 233 | ||
234 | void arch_crash_save_vmcoreinfo(void) | 234 | void arch_crash_save_vmcoreinfo(void) |
235 | { | 235 | { |
236 | VMCOREINFO_SYMBOL(phys_base); | ||
236 | VMCOREINFO_SYMBOL(init_level4_pgt); | 237 | VMCOREINFO_SYMBOL(init_level4_pgt); |
237 | 238 | ||
238 | #ifdef CONFIG_NUMA | 239 | #ifdef CONFIG_NUMA |
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 027fc067b399..b402c0f3f192 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c | |||
@@ -30,6 +30,7 @@ | |||
30 | 30 | ||
31 | #include <linux/kernel.h> | 31 | #include <linux/kernel.h> |
32 | #include <linux/interrupt.h> | 32 | #include <linux/interrupt.h> |
33 | #include <linux/module.h> | ||
33 | #include <asm/geode.h> | 34 | #include <asm/geode.h> |
34 | 35 | ||
35 | static struct mfgpt_timer_t { | 36 | static struct mfgpt_timer_t { |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index faf3229f8fb3..700e4647dd30 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -615,8 +615,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
615 | 615 | ||
616 | nommu: | 616 | nommu: |
617 | /* Should not happen anymore */ | 617 | /* Should not happen anymore */ |
618 | printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n" | 618 | printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" |
619 | KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n"); | 619 | KERN_WARNING "falling back to iommu=soft.\n"); |
620 | return -1; | 620 | return -1; |
621 | } | 621 | } |
622 | 622 | ||
@@ -692,9 +692,9 @@ void __init gart_iommu_init(void) | |||
692 | !gart_iommu_aperture || | 692 | !gart_iommu_aperture || |
693 | (no_agp && init_k8_gatt(&info) < 0)) { | 693 | (no_agp && init_k8_gatt(&info) < 0)) { |
694 | if (end_pfn > MAX_DMA32_PFN) { | 694 | if (end_pfn > MAX_DMA32_PFN) { |
695 | printk(KERN_ERR "WARNING more than 4GB of memory " | 695 | printk(KERN_WARNING "More than 4GB of memory " |
696 | "but GART IOMMU not available.\n" | 696 | "but GART IOMMU not available.\n" |
697 | KERN_ERR "WARNING 32bit PCI may malfunction.\n"); | 697 | KERN_WARNING "falling back to iommu=soft.\n"); |
698 | } | 698 | } |
699 | return; | 699 | return; |
700 | } | 700 | } |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index be3c7a299f02..43930e73f657 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -82,7 +82,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) | |||
82 | */ | 82 | */ |
83 | void (*pm_idle)(void); | 83 | void (*pm_idle)(void); |
84 | EXPORT_SYMBOL(pm_idle); | 84 | EXPORT_SYMBOL(pm_idle); |
85 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); | ||
86 | 85 | ||
87 | void disable_hlt(void) | 86 | void disable_hlt(void) |
88 | { | 87 | { |
@@ -190,9 +189,6 @@ void cpu_idle(void) | |||
190 | while (!need_resched()) { | 189 | while (!need_resched()) { |
191 | void (*idle)(void); | 190 | void (*idle)(void); |
192 | 191 | ||
193 | if (__get_cpu_var(cpu_idle_state)) | ||
194 | __get_cpu_var(cpu_idle_state) = 0; | ||
195 | |||
196 | check_pgt_cache(); | 192 | check_pgt_cache(); |
197 | rmb(); | 193 | rmb(); |
198 | idle = pm_idle; | 194 | idle = pm_idle; |
@@ -220,40 +216,19 @@ static void do_nothing(void *unused) | |||
220 | { | 216 | { |
221 | } | 217 | } |
222 | 218 | ||
219 | /* | ||
220 | * cpu_idle_wait - Used to ensure that all the CPUs discard old value of | ||
221 | * pm_idle and update to new pm_idle value. Required while changing pm_idle | ||
222 | * handler on SMP systems. | ||
223 | * | ||
224 | * Caller must have changed pm_idle to the new value before the call. Old | ||
225 | * pm_idle value will not be used by any CPU after the return of this function. | ||
226 | */ | ||
223 | void cpu_idle_wait(void) | 227 | void cpu_idle_wait(void) |
224 | { | 228 | { |
225 | unsigned int cpu, this_cpu = get_cpu(); | 229 | smp_mb(); |
226 | cpumask_t map, tmp = current->cpus_allowed; | 230 | /* kick all the CPUs so that they exit out of pm_idle */ |
227 | 231 | smp_call_function(do_nothing, NULL, 0, 1); | |
228 | set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); | ||
229 | put_cpu(); | ||
230 | |||
231 | cpus_clear(map); | ||
232 | for_each_online_cpu(cpu) { | ||
233 | per_cpu(cpu_idle_state, cpu) = 1; | ||
234 | cpu_set(cpu, map); | ||
235 | } | ||
236 | |||
237 | __get_cpu_var(cpu_idle_state) = 0; | ||
238 | |||
239 | wmb(); | ||
240 | do { | ||
241 | ssleep(1); | ||
242 | for_each_online_cpu(cpu) { | ||
243 | if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) | ||
244 | cpu_clear(cpu, map); | ||
245 | } | ||
246 | cpus_and(map, map, cpu_online_map); | ||
247 | /* | ||
248 | * We waited 1 sec, if a CPU still did not call idle | ||
249 | * it may be because it is in idle and not waking up | ||
250 | * because it has nothing to do. | ||
251 | * Give all the remaining CPUS a kick. | ||
252 | */ | ||
253 | smp_call_function_mask(map, do_nothing, NULL, 0); | ||
254 | } while (!cpus_empty(map)); | ||
255 | |||
256 | set_cpus_allowed(current, tmp); | ||
257 | } | 232 | } |
258 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | 233 | EXPORT_SYMBOL_GPL(cpu_idle_wait); |
259 | 234 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3baf9b9f4c87..46c4c546b499 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -63,7 +63,6 @@ EXPORT_SYMBOL(boot_option_idle_override); | |||
63 | */ | 63 | */ |
64 | void (*pm_idle)(void); | 64 | void (*pm_idle)(void); |
65 | EXPORT_SYMBOL(pm_idle); | 65 | EXPORT_SYMBOL(pm_idle); |
66 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); | ||
67 | 66 | ||
68 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); | 67 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); |
69 | 68 | ||
@@ -173,9 +172,6 @@ void cpu_idle(void) | |||
173 | while (!need_resched()) { | 172 | while (!need_resched()) { |
174 | void (*idle)(void); | 173 | void (*idle)(void); |
175 | 174 | ||
176 | if (__get_cpu_var(cpu_idle_state)) | ||
177 | __get_cpu_var(cpu_idle_state) = 0; | ||
178 | |||
179 | rmb(); | 175 | rmb(); |
180 | idle = pm_idle; | 176 | idle = pm_idle; |
181 | if (!idle) | 177 | if (!idle) |
@@ -207,40 +203,19 @@ static void do_nothing(void *unused) | |||
207 | { | 203 | { |
208 | } | 204 | } |
209 | 205 | ||
206 | /* | ||
207 | * cpu_idle_wait - Used to ensure that all the CPUs discard old value of | ||
208 | * pm_idle and update to new pm_idle value. Required while changing pm_idle | ||
209 | * handler on SMP systems. | ||
210 | * | ||
211 | * Caller must have changed pm_idle to the new value before the call. Old | ||
212 | * pm_idle value will not be used by any CPU after the return of this function. | ||
213 | */ | ||
210 | void cpu_idle_wait(void) | 214 | void cpu_idle_wait(void) |
211 | { | 215 | { |
212 | unsigned int cpu, this_cpu = get_cpu(); | 216 | smp_mb(); |
213 | cpumask_t map, tmp = current->cpus_allowed; | 217 | /* kick all the CPUs so that they exit out of pm_idle */ |
214 | 218 | smp_call_function(do_nothing, NULL, 0, 1); | |
215 | set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); | ||
216 | put_cpu(); | ||
217 | |||
218 | cpus_clear(map); | ||
219 | for_each_online_cpu(cpu) { | ||
220 | per_cpu(cpu_idle_state, cpu) = 1; | ||
221 | cpu_set(cpu, map); | ||
222 | } | ||
223 | |||
224 | __get_cpu_var(cpu_idle_state) = 0; | ||
225 | |||
226 | wmb(); | ||
227 | do { | ||
228 | ssleep(1); | ||
229 | for_each_online_cpu(cpu) { | ||
230 | if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) | ||
231 | cpu_clear(cpu, map); | ||
232 | } | ||
233 | cpus_and(map, map, cpu_online_map); | ||
234 | /* | ||
235 | * We waited 1 sec, if a CPU still did not call idle | ||
236 | * it may be because it is in idle and not waking up | ||
237 | * because it has nothing to do. | ||
238 | * Give all the remaining CPUS a kick. | ||
239 | */ | ||
240 | smp_call_function_mask(map, do_nothing, 0, 0); | ||
241 | } while (!cpus_empty(map)); | ||
242 | |||
243 | set_cpus_allowed(current, tmp); | ||
244 | } | 219 | } |
245 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | 220 | EXPORT_SYMBOL_GPL(cpu_idle_wait); |
246 | 221 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index d5904eef1d31..eb92ccbb3502 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -600,21 +600,6 @@ static int ptrace_bts_read_record(struct task_struct *child, | |||
600 | return sizeof(ret); | 600 | return sizeof(ret); |
601 | } | 601 | } |
602 | 602 | ||
603 | static int ptrace_bts_write_record(struct task_struct *child, | ||
604 | const struct bts_struct *in) | ||
605 | { | ||
606 | int retval; | ||
607 | |||
608 | if (!child->thread.ds_area_msr) | ||
609 | return -ENXIO; | ||
610 | |||
611 | retval = ds_write_bts((void *)child->thread.ds_area_msr, in); | ||
612 | if (retval) | ||
613 | return retval; | ||
614 | |||
615 | return sizeof(*in); | ||
616 | } | ||
617 | |||
618 | static int ptrace_bts_clear(struct task_struct *child) | 603 | static int ptrace_bts_clear(struct task_struct *child) |
619 | { | 604 | { |
620 | if (!child->thread.ds_area_msr) | 605 | if (!child->thread.ds_area_msr) |
@@ -657,75 +642,6 @@ static int ptrace_bts_drain(struct task_struct *child, | |||
657 | return end; | 642 | return end; |
658 | } | 643 | } |
659 | 644 | ||
660 | static int ptrace_bts_realloc(struct task_struct *child, | ||
661 | int size, int reduce_size) | ||
662 | { | ||
663 | unsigned long rlim, vm; | ||
664 | int ret, old_size; | ||
665 | |||
666 | if (size < 0) | ||
667 | return -EINVAL; | ||
668 | |||
669 | old_size = ds_get_bts_size((void *)child->thread.ds_area_msr); | ||
670 | if (old_size < 0) | ||
671 | return old_size; | ||
672 | |||
673 | ret = ds_free((void **)&child->thread.ds_area_msr); | ||
674 | if (ret < 0) | ||
675 | goto out; | ||
676 | |||
677 | size >>= PAGE_SHIFT; | ||
678 | old_size >>= PAGE_SHIFT; | ||
679 | |||
680 | current->mm->total_vm -= old_size; | ||
681 | current->mm->locked_vm -= old_size; | ||
682 | |||
683 | if (size == 0) | ||
684 | goto out; | ||
685 | |||
686 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | ||
687 | vm = current->mm->total_vm + size; | ||
688 | if (rlim < vm) { | ||
689 | ret = -ENOMEM; | ||
690 | |||
691 | if (!reduce_size) | ||
692 | goto out; | ||
693 | |||
694 | size = rlim - current->mm->total_vm; | ||
695 | if (size <= 0) | ||
696 | goto out; | ||
697 | } | ||
698 | |||
699 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | ||
700 | vm = current->mm->locked_vm + size; | ||
701 | if (rlim < vm) { | ||
702 | ret = -ENOMEM; | ||
703 | |||
704 | if (!reduce_size) | ||
705 | goto out; | ||
706 | |||
707 | size = rlim - current->mm->locked_vm; | ||
708 | if (size <= 0) | ||
709 | goto out; | ||
710 | } | ||
711 | |||
712 | ret = ds_allocate((void **)&child->thread.ds_area_msr, | ||
713 | size << PAGE_SHIFT); | ||
714 | if (ret < 0) | ||
715 | goto out; | ||
716 | |||
717 | current->mm->total_vm += size; | ||
718 | current->mm->locked_vm += size; | ||
719 | |||
720 | out: | ||
721 | if (child->thread.ds_area_msr) | ||
722 | set_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
723 | else | ||
724 | clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
725 | |||
726 | return ret; | ||
727 | } | ||
728 | |||
729 | static int ptrace_bts_config(struct task_struct *child, | 645 | static int ptrace_bts_config(struct task_struct *child, |
730 | long cfg_size, | 646 | long cfg_size, |
731 | const struct ptrace_bts_config __user *ucfg) | 647 | const struct ptrace_bts_config __user *ucfg) |
@@ -828,6 +744,91 @@ static int ptrace_bts_status(struct task_struct *child, | |||
828 | return sizeof(cfg); | 744 | return sizeof(cfg); |
829 | } | 745 | } |
830 | 746 | ||
747 | |||
748 | static int ptrace_bts_write_record(struct task_struct *child, | ||
749 | const struct bts_struct *in) | ||
750 | { | ||
751 | int retval; | ||
752 | |||
753 | if (!child->thread.ds_area_msr) | ||
754 | return -ENXIO; | ||
755 | |||
756 | retval = ds_write_bts((void *)child->thread.ds_area_msr, in); | ||
757 | if (retval) | ||
758 | return retval; | ||
759 | |||
760 | return sizeof(*in); | ||
761 | } | ||
762 | |||
763 | static int ptrace_bts_realloc(struct task_struct *child, | ||
764 | int size, int reduce_size) | ||
765 | { | ||
766 | unsigned long rlim, vm; | ||
767 | int ret, old_size; | ||
768 | |||
769 | if (size < 0) | ||
770 | return -EINVAL; | ||
771 | |||
772 | old_size = ds_get_bts_size((void *)child->thread.ds_area_msr); | ||
773 | if (old_size < 0) | ||
774 | return old_size; | ||
775 | |||
776 | ret = ds_free((void **)&child->thread.ds_area_msr); | ||
777 | if (ret < 0) | ||
778 | goto out; | ||
779 | |||
780 | size >>= PAGE_SHIFT; | ||
781 | old_size >>= PAGE_SHIFT; | ||
782 | |||
783 | current->mm->total_vm -= old_size; | ||
784 | current->mm->locked_vm -= old_size; | ||
785 | |||
786 | if (size == 0) | ||
787 | goto out; | ||
788 | |||
789 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | ||
790 | vm = current->mm->total_vm + size; | ||
791 | if (rlim < vm) { | ||
792 | ret = -ENOMEM; | ||
793 | |||
794 | if (!reduce_size) | ||
795 | goto out; | ||
796 | |||
797 | size = rlim - current->mm->total_vm; | ||
798 | if (size <= 0) | ||
799 | goto out; | ||
800 | } | ||
801 | |||
802 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | ||
803 | vm = current->mm->locked_vm + size; | ||
804 | if (rlim < vm) { | ||
805 | ret = -ENOMEM; | ||
806 | |||
807 | if (!reduce_size) | ||
808 | goto out; | ||
809 | |||
810 | size = rlim - current->mm->locked_vm; | ||
811 | if (size <= 0) | ||
812 | goto out; | ||
813 | } | ||
814 | |||
815 | ret = ds_allocate((void **)&child->thread.ds_area_msr, | ||
816 | size << PAGE_SHIFT); | ||
817 | if (ret < 0) | ||
818 | goto out; | ||
819 | |||
820 | current->mm->total_vm += size; | ||
821 | current->mm->locked_vm += size; | ||
822 | |||
823 | out: | ||
824 | if (child->thread.ds_area_msr) | ||
825 | set_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
826 | else | ||
827 | clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
828 | |||
829 | return ret; | ||
830 | } | ||
831 | |||
831 | void ptrace_bts_take_timestamp(struct task_struct *tsk, | 832 | void ptrace_bts_take_timestamp(struct task_struct *tsk, |
832 | enum bts_qualifier qualifier) | 833 | enum bts_qualifier qualifier) |
833 | { | 834 | { |
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index a1d7071a51c9..2b3e5d45176b 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c | |||
@@ -406,8 +406,6 @@ static unsigned long __init setup_memory(void) | |||
406 | */ | 406 | */ |
407 | min_low_pfn = PFN_UP(init_pg_tables_end); | 407 | min_low_pfn = PFN_UP(init_pg_tables_end); |
408 | 408 | ||
409 | find_max_pfn(); | ||
410 | |||
411 | max_low_pfn = find_max_low_pfn(); | 409 | max_low_pfn = find_max_low_pfn(); |
412 | 410 | ||
413 | #ifdef CONFIG_HIGHMEM | 411 | #ifdef CONFIG_HIGHMEM |
@@ -764,12 +762,13 @@ void __init setup_arch(char **cmdline_p) | |||
764 | if (efi_enabled) | 762 | if (efi_enabled) |
765 | efi_init(); | 763 | efi_init(); |
766 | 764 | ||
767 | max_low_pfn = setup_memory(); | ||
768 | |||
769 | /* update e820 for memory not covered by WB MTRRs */ | 765 | /* update e820 for memory not covered by WB MTRRs */ |
766 | find_max_pfn(); | ||
770 | mtrr_bp_init(); | 767 | mtrr_bp_init(); |
771 | if (mtrr_trim_uncached_memory(max_pfn)) | 768 | if (mtrr_trim_uncached_memory(max_pfn)) |
772 | max_low_pfn = setup_memory(); | 769 | find_max_pfn(); |
770 | |||
771 | max_low_pfn = setup_memory(); | ||
773 | 772 | ||
774 | #ifdef CONFIG_VMI | 773 | #ifdef CONFIG_VMI |
775 | /* | 774 | /* |
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 7637dc91c79b..f4f7ecfb898c 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -801,7 +801,7 @@ static void __cpuinit srat_detect_node(void) | |||
801 | /* Don't do the funky fallback heuristics the AMD version employs | 801 | /* Don't do the funky fallback heuristics the AMD version employs |
802 | for now. */ | 802 | for now. */ |
803 | node = apicid_to_node[apicid]; | 803 | node = apicid_to_node[apicid]; |
804 | if (node == NUMA_NO_NODE) | 804 | if (node == NUMA_NO_NODE || !node_online(node)) |
805 | node = first_node(node_online_map); | 805 | node = first_node(node_online_map); |
806 | numa_set_node(cpu, node); | 806 | numa_set_node(cpu, node); |
807 | 807 | ||
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 9d406cdc847f..071ff4798236 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -140,6 +140,9 @@ static int enable_single_step(struct task_struct *child) | |||
140 | */ | 140 | */ |
141 | static void write_debugctlmsr(struct task_struct *child, unsigned long val) | 141 | static void write_debugctlmsr(struct task_struct *child, unsigned long val) |
142 | { | 142 | { |
143 | if (child->thread.debugctlmsr == val) | ||
144 | return; | ||
145 | |||
143 | child->thread.debugctlmsr = val; | 146 | child->thread.debugctlmsr = val; |
144 | 147 | ||
145 | if (child != current) | 148 | if (child != current) |
@@ -165,11 +168,11 @@ static void enable_step(struct task_struct *child, bool block) | |||
165 | write_debugctlmsr(child, | 168 | write_debugctlmsr(child, |
166 | child->thread.debugctlmsr | DEBUGCTLMSR_BTF); | 169 | child->thread.debugctlmsr | DEBUGCTLMSR_BTF); |
167 | } else { | 170 | } else { |
168 | write_debugctlmsr(child, | 171 | write_debugctlmsr(child, |
169 | child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); | 172 | child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); |
170 | 173 | ||
171 | if (!child->thread.debugctlmsr) | 174 | if (!child->thread.debugctlmsr) |
172 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 175 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); |
173 | } | 176 | } |
174 | } | 177 | } |
175 | 178 | ||
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 022bcaa3b42e..ab6bf375a307 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c | |||
@@ -92,7 +92,7 @@ int do_set_thread_area(struct task_struct *p, int idx, | |||
92 | asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) | 92 | asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) |
93 | { | 93 | { |
94 | int ret = do_set_thread_area(current, -1, u_info, 1); | 94 | int ret = do_set_thread_area(current, -1, u_info, 1); |
95 | prevent_tail_call(ret); | 95 | asmlinkage_protect(1, ret, u_info); |
96 | return ret; | 96 | return ret; |
97 | } | 97 | } |
98 | 98 | ||
@@ -142,7 +142,7 @@ int do_get_thread_area(struct task_struct *p, int idx, | |||
142 | asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) | 142 | asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) |
143 | { | 143 | { |
144 | int ret = do_get_thread_area(current, -1, u_info); | 144 | int ret = do_get_thread_area(current, -1, u_info); |
145 | prevent_tail_call(ret); | 145 | asmlinkage_protect(1, ret, u_info); |
146 | return ret; | 146 | return ret; |
147 | } | 147 | } |
148 | 148 | ||
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index f14cfd9d1f94..c2241e04ea5f 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c | |||
@@ -256,9 +256,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) | |||
256 | ref_freq, freq->new); | 256 | ref_freq, freq->new); |
257 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { | 257 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { |
258 | tsc_khz = cpu_khz; | 258 | tsc_khz = cpu_khz; |
259 | preempt_disable(); | 259 | set_cyc2ns_scale(cpu_khz, freq->cpu); |
260 | set_cyc2ns_scale(cpu_khz, smp_processor_id()); | ||
261 | preempt_enable(); | ||
262 | /* | 260 | /* |
263 | * TSC based sched_clock turns | 261 | * TSC based sched_clock turns |
264 | * to junk w/ cpufreq | 262 | * to junk w/ cpufreq |
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index 947554ddabb6..d3bebaaad842 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c | |||
@@ -148,9 +148,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
148 | mark_tsc_unstable("cpufreq changes"); | 148 | mark_tsc_unstable("cpufreq changes"); |
149 | } | 149 | } |
150 | 150 | ||
151 | preempt_disable(); | 151 | set_cyc2ns_scale(tsc_khz_ref, freq->cpu); |
152 | set_cyc2ns_scale(tsc_khz_ref, smp_processor_id()); | ||
153 | preempt_enable(); | ||
154 | 152 | ||
155 | return 0; | 153 | return 0; |
156 | } | 154 | } |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index a104c532ff70..3335b4595efd 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -10,21 +10,19 @@ | |||
10 | * (such as the example in Documentation/lguest/lguest.c) is called the | 10 | * (such as the example in Documentation/lguest/lguest.c) is called the |
11 | * Launcher. | 11 | * Launcher. |
12 | * | 12 | * |
13 | * Secondly, we only run specially modified Guests, not normal kernels. When | 13 | * Secondly, we only run specially modified Guests, not normal kernels: setting |
14 | * you set CONFIG_LGUEST to 'y' or 'm', this automatically sets | 14 | * CONFIG_LGUEST_GUEST to "y" compiles this file into the kernel so it knows |
15 | * CONFIG_LGUEST_GUEST=y, which compiles this file into the kernel so it knows | 15 | * how to be a Guest at boot time. This means that you can use the same kernel |
16 | * how to be a Guest. This means that you can use the same kernel you boot | 16 | * you boot normally (ie. as a Host) as a Guest. |
17 | * normally (ie. as a Host) as a Guest. | ||
18 | * | 17 | * |
19 | * These Guests know that they cannot do privileged operations, such as disable | 18 | * These Guests know that they cannot do privileged operations, such as disable |
20 | * interrupts, and that they have to ask the Host to do such things explicitly. | 19 | * interrupts, and that they have to ask the Host to do such things explicitly. |
21 | * This file consists of all the replacements for such low-level native | 20 | * This file consists of all the replacements for such low-level native |
22 | * hardware operations: these special Guest versions call the Host. | 21 | * hardware operations: these special Guest versions call the Host. |
23 | * | 22 | * |
24 | * So how does the kernel know it's a Guest? The Guest starts at a special | 23 | * So how does the kernel know it's a Guest? We'll see that later, but let's |
25 | * entry point marked with a magic string, which sets up a few things then | 24 | * just say that we end up here where we replace the native functions various |
26 | * calls here. We replace the native functions various "paravirt" structures | 25 | * "paravirt" structures with our Guest versions, then boot like normal. :*/ |
27 | * with our Guest versions, then boot like normal. :*/ | ||
28 | 26 | ||
29 | /* | 27 | /* |
30 | * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. | 28 | * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. |
@@ -134,7 +132,7 @@ static void async_hcall(unsigned long call, unsigned long arg1, | |||
134 | * lguest_leave_lazy_mode(). | 132 | * lguest_leave_lazy_mode(). |
135 | * | 133 | * |
136 | * So, when we're in lazy mode, we call async_hcall() to store the call for | 134 | * So, when we're in lazy mode, we call async_hcall() to store the call for |
137 | * future processing. */ | 135 | * future processing: */ |
138 | static void lazy_hcall(unsigned long call, | 136 | static void lazy_hcall(unsigned long call, |
139 | unsigned long arg1, | 137 | unsigned long arg1, |
140 | unsigned long arg2, | 138 | unsigned long arg2, |
@@ -147,7 +145,7 @@ static void lazy_hcall(unsigned long call, | |||
147 | } | 145 | } |
148 | 146 | ||
149 | /* When lazy mode is turned off reset the per-cpu lazy mode variable and then | 147 | /* When lazy mode is turned off reset the per-cpu lazy mode variable and then |
150 | * issue a hypercall to flush any stored calls. */ | 148 | * issue the do-nothing hypercall to flush any stored calls. */ |
151 | static void lguest_leave_lazy_mode(void) | 149 | static void lguest_leave_lazy_mode(void) |
152 | { | 150 | { |
153 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | 151 | paravirt_leave_lazy(paravirt_get_lazy_mode()); |
@@ -164,7 +162,7 @@ static void lguest_leave_lazy_mode(void) | |||
164 | * | 162 | * |
165 | * So instead we keep an "irq_enabled" field inside our "struct lguest_data", | 163 | * So instead we keep an "irq_enabled" field inside our "struct lguest_data", |
166 | * which the Guest can update with a single instruction. The Host knows to | 164 | * which the Guest can update with a single instruction. The Host knows to |
167 | * check there when it wants to deliver an interrupt. | 165 | * check there before it tries to deliver an interrupt. |
168 | */ | 166 | */ |
169 | 167 | ||
170 | /* save_flags() is expected to return the processor state (ie. "flags"). The | 168 | /* save_flags() is expected to return the processor state (ie. "flags"). The |
@@ -196,10 +194,15 @@ static void irq_enable(void) | |||
196 | /*M:003 Note that we don't check for outstanding interrupts when we re-enable | 194 | /*M:003 Note that we don't check for outstanding interrupts when we re-enable |
197 | * them (or when we unmask an interrupt). This seems to work for the moment, | 195 | * them (or when we unmask an interrupt). This seems to work for the moment, |
198 | * since interrupts are rare and we'll just get the interrupt on the next timer | 196 | * since interrupts are rare and we'll just get the interrupt on the next timer |
199 | * tick, but when we turn on CONFIG_NO_HZ, we should revisit this. One way | 197 | * tick, but now we can run with CONFIG_NO_HZ, we should revisit this. One way |
200 | * would be to put the "irq_enabled" field in a page by itself, and have the | 198 | * would be to put the "irq_enabled" field in a page by itself, and have the |
201 | * Host write-protect it when an interrupt comes in when irqs are disabled. | 199 | * Host write-protect it when an interrupt comes in when irqs are disabled. |
202 | * There will then be a page fault as soon as interrupts are re-enabled. :*/ | 200 | * There will then be a page fault as soon as interrupts are re-enabled. |
201 | * | ||
202 | * A better method is to implement soft interrupt disable generally for x86: | ||
203 | * instead of disabling interrupts, we set a flag. If an interrupt does come | ||
204 | * in, we then disable them for real. This is uncommon, so we could simply use | ||
205 | * a hypercall for interrupt control and not worry about efficiency. :*/ | ||
203 | 206 | ||
204 | /*G:034 | 207 | /*G:034 |
205 | * The Interrupt Descriptor Table (IDT). | 208 | * The Interrupt Descriptor Table (IDT). |
@@ -212,6 +215,10 @@ static void irq_enable(void) | |||
212 | static void lguest_write_idt_entry(gate_desc *dt, | 215 | static void lguest_write_idt_entry(gate_desc *dt, |
213 | int entrynum, const gate_desc *g) | 216 | int entrynum, const gate_desc *g) |
214 | { | 217 | { |
218 | /* The gate_desc structure is 8 bytes long: we hand it to the Host in | ||
219 | * two 32-bit chunks. The whole 32-bit kernel used to hand descriptors | ||
220 | * around like this; typesafety wasn't a big concern in Linux's early | ||
221 | * years. */ | ||
215 | u32 *desc = (u32 *)g; | 222 | u32 *desc = (u32 *)g; |
216 | /* Keep the local copy up to date. */ | 223 | /* Keep the local copy up to date. */ |
217 | native_write_idt_entry(dt, entrynum, g); | 224 | native_write_idt_entry(dt, entrynum, g); |
@@ -243,7 +250,8 @@ static void lguest_load_idt(const struct desc_ptr *desc) | |||
243 | * | 250 | * |
244 | * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY | 251 | * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY |
245 | * hypercall and use that repeatedly to load a new IDT. I don't think it | 252 | * hypercall and use that repeatedly to load a new IDT. I don't think it |
246 | * really matters, but wouldn't it be nice if they were the same? | 253 | * really matters, but wouldn't it be nice if they were the same? Wouldn't |
254 | * it be even better if you were the one to send the patch to fix it? | ||
247 | */ | 255 | */ |
248 | static void lguest_load_gdt(const struct desc_ptr *desc) | 256 | static void lguest_load_gdt(const struct desc_ptr *desc) |
249 | { | 257 | { |
@@ -298,9 +306,9 @@ static void lguest_load_tr_desc(void) | |||
298 | 306 | ||
299 | /* The "cpuid" instruction is a way of querying both the CPU identity | 307 | /* The "cpuid" instruction is a way of querying both the CPU identity |
300 | * (manufacturer, model, etc) and its features. It was introduced before the | 308 | * (manufacturer, model, etc) and its features. It was introduced before the |
301 | * Pentium in 1993 and keeps getting extended by both Intel and AMD. As you | 309 | * Pentium in 1993 and keeps getting extended by both Intel, AMD and others. |
302 | * might imagine, after a decade and a half this treatment, it is now a giant | 310 | * As you might imagine, after a decade and a half this treatment, it is now a |
303 | * ball of hair. Its entry in the current Intel manual runs to 28 pages. | 311 | * giant ball of hair. Its entry in the current Intel manual runs to 28 pages. |
304 | * | 312 | * |
305 | * This instruction even it has its own Wikipedia entry. The Wikipedia entry | 313 | * This instruction even it has its own Wikipedia entry. The Wikipedia entry |
306 | * has been translated into 4 languages. I am not making this up! | 314 | * has been translated into 4 languages. I am not making this up! |
@@ -594,17 +602,17 @@ static unsigned long lguest_get_wallclock(void) | |||
594 | return lguest_data.time.tv_sec; | 602 | return lguest_data.time.tv_sec; |
595 | } | 603 | } |
596 | 604 | ||
597 | /* The TSC is a Time Stamp Counter. The Host tells us what speed it runs at, | 605 | /* The TSC is an Intel thing called the Time Stamp Counter. The Host tells us |
598 | * or 0 if it's unusable as a reliable clock source. This matches what we want | 606 | * what speed it runs at, or 0 if it's unusable as a reliable clock source. |
599 | * here: if we return 0 from this function, the x86 TSC clock will not register | 607 | * This matches what we want here: if we return 0 from this function, the x86 |
600 | * itself. */ | 608 | * TSC clock will give up and not register itself. */ |
601 | static unsigned long lguest_cpu_khz(void) | 609 | static unsigned long lguest_cpu_khz(void) |
602 | { | 610 | { |
603 | return lguest_data.tsc_khz; | 611 | return lguest_data.tsc_khz; |
604 | } | 612 | } |
605 | 613 | ||
606 | /* If we can't use the TSC, the kernel falls back to our "lguest_clock", where | 614 | /* If we can't use the TSC, the kernel falls back to our lower-priority |
607 | * we read the time value given to us by the Host. */ | 615 | * "lguest_clock", where we read the time value given to us by the Host. */ |
608 | static cycle_t lguest_clock_read(void) | 616 | static cycle_t lguest_clock_read(void) |
609 | { | 617 | { |
610 | unsigned long sec, nsec; | 618 | unsigned long sec, nsec; |
@@ -648,12 +656,16 @@ static struct clocksource lguest_clock = { | |||
648 | static int lguest_clockevent_set_next_event(unsigned long delta, | 656 | static int lguest_clockevent_set_next_event(unsigned long delta, |
649 | struct clock_event_device *evt) | 657 | struct clock_event_device *evt) |
650 | { | 658 | { |
659 | /* FIXME: I don't think this can ever happen, but James tells me he had | ||
660 | * to put this code in. Maybe we should remove it now. Anyone? */ | ||
651 | if (delta < LG_CLOCK_MIN_DELTA) { | 661 | if (delta < LG_CLOCK_MIN_DELTA) { |
652 | if (printk_ratelimit()) | 662 | if (printk_ratelimit()) |
653 | printk(KERN_DEBUG "%s: small delta %lu ns\n", | 663 | printk(KERN_DEBUG "%s: small delta %lu ns\n", |
654 | __FUNCTION__, delta); | 664 | __FUNCTION__, delta); |
655 | return -ETIME; | 665 | return -ETIME; |
656 | } | 666 | } |
667 | |||
668 | /* Please wake us this far in the future. */ | ||
657 | hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0); | 669 | hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0); |
658 | return 0; | 670 | return 0; |
659 | } | 671 | } |
@@ -738,7 +750,7 @@ static void lguest_time_init(void) | |||
738 | * will not tolerate us trying to use that), the stack pointer, and the number | 750 | * will not tolerate us trying to use that), the stack pointer, and the number |
739 | * of pages in the stack. */ | 751 | * of pages in the stack. */ |
740 | static void lguest_load_sp0(struct tss_struct *tss, | 752 | static void lguest_load_sp0(struct tss_struct *tss, |
741 | struct thread_struct *thread) | 753 | struct thread_struct *thread) |
742 | { | 754 | { |
743 | lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->sp0, | 755 | lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->sp0, |
744 | THREAD_SIZE/PAGE_SIZE); | 756 | THREAD_SIZE/PAGE_SIZE); |
@@ -786,9 +798,8 @@ static void lguest_safe_halt(void) | |||
786 | hcall(LHCALL_HALT, 0, 0, 0); | 798 | hcall(LHCALL_HALT, 0, 0, 0); |
787 | } | 799 | } |
788 | 800 | ||
789 | /* Perhaps CRASH isn't the best name for this hypercall, but we use it to get a | 801 | /* The SHUTDOWN hypercall takes a string to describe what's happening, and |
790 | * message out when we're crashing as well as elegant termination like powering | 802 | * an argument which says whether this to restart (reboot) the Guest or not. |
791 | * off. | ||
792 | * | 803 | * |
793 | * Note that the Host always prefers that the Guest speak in physical addresses | 804 | * Note that the Host always prefers that the Guest speak in physical addresses |
794 | * rather than virtual addresses, so we use __pa() here. */ | 805 | * rather than virtual addresses, so we use __pa() here. */ |
@@ -816,8 +827,9 @@ static struct notifier_block paniced = { | |||
816 | /* Setting up memory is fairly easy. */ | 827 | /* Setting up memory is fairly easy. */ |
817 | static __init char *lguest_memory_setup(void) | 828 | static __init char *lguest_memory_setup(void) |
818 | { | 829 | { |
819 | /* We do this here and not earlier because lockcheck barfs if we do it | 830 | /* We do this here and not earlier because lockcheck used to barf if we |
820 | * before start_kernel() */ | 831 | * did it before start_kernel(). I think we fixed that, so it'd be |
832 | * nice to move it back to lguest_init. Patch welcome... */ | ||
821 | atomic_notifier_chain_register(&panic_notifier_list, &paniced); | 833 | atomic_notifier_chain_register(&panic_notifier_list, &paniced); |
822 | 834 | ||
823 | /* The Linux bootloader header contains an "e820" memory map: the | 835 | /* The Linux bootloader header contains an "e820" memory map: the |
@@ -850,12 +862,19 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) | |||
850 | return len; | 862 | return len; |
851 | } | 863 | } |
852 | 864 | ||
865 | /* Rebooting also tells the Host we're finished, but the RESTART flag tells the | ||
866 | * Launcher to reboot us. */ | ||
867 | static void lguest_restart(char *reason) | ||
868 | { | ||
869 | hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0); | ||
870 | } | ||
871 | |||
853 | /*G:050 | 872 | /*G:050 |
854 | * Patching (Powerfully Placating Performance Pedants) | 873 | * Patching (Powerfully Placating Performance Pedants) |
855 | * | 874 | * |
856 | * We have already seen that pv_ops structures let us replace simple | 875 | * We have already seen that pv_ops structures let us replace simple native |
857 | * native instructions with calls to the appropriate back end all throughout | 876 | * instructions with calls to the appropriate back end all throughout the |
858 | * the kernel. This allows the same kernel to run as a Guest and as a native | 877 | * kernel. This allows the same kernel to run as a Guest and as a native |
859 | * kernel, but it's slow because of all the indirect branches. | 878 | * kernel, but it's slow because of all the indirect branches. |
860 | * | 879 | * |
861 | * Remember that David Wheeler quote about "Any problem in computer science can | 880 | * Remember that David Wheeler quote about "Any problem in computer science can |
@@ -908,14 +927,9 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, | |||
908 | return insn_len; | 927 | return insn_len; |
909 | } | 928 | } |
910 | 929 | ||
911 | static void lguest_restart(char *reason) | 930 | /*G:030 Once we get to lguest_init(), we know we're a Guest. The various |
912 | { | 931 | * pv_ops structures in the kernel provide points for (almost) every routine we |
913 | hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0); | 932 | * have to override to avoid privileged instructions. */ |
914 | } | ||
915 | |||
916 | /*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops | ||
917 | * structures in the kernel provide points for (almost) every routine we have | ||
918 | * to override to avoid privileged instructions. */ | ||
919 | __init void lguest_init(void) | 933 | __init void lguest_init(void) |
920 | { | 934 | { |
921 | /* We're under lguest, paravirt is enabled, and we're running at | 935 | /* We're under lguest, paravirt is enabled, and we're running at |
@@ -1003,9 +1017,9 @@ __init void lguest_init(void) | |||
1003 | * the normal data segment to get through booting. */ | 1017 | * the normal data segment to get through booting. */ |
1004 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); | 1018 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); |
1005 | 1019 | ||
1006 | /* The Host uses the top of the Guest's virtual address space for the | 1020 | /* The Host<->Guest Switcher lives at the top of our address space, and |
1007 | * Host<->Guest Switcher, and it tells us how big that is in | 1021 | * the Host told us how big it is when we made LGUEST_INIT hypercall: |
1008 | * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */ | 1022 | * it put the answer in lguest_data.reserve_mem */ |
1009 | reserve_top_address(lguest_data.reserve_mem); | 1023 | reserve_top_address(lguest_data.reserve_mem); |
1010 | 1024 | ||
1011 | /* If we don't initialize the lock dependency checker now, it crashes | 1025 | /* If we don't initialize the lock dependency checker now, it crashes |
@@ -1027,6 +1041,7 @@ __init void lguest_init(void) | |||
1027 | /* Math is always hard! */ | 1041 | /* Math is always hard! */ |
1028 | new_cpu_data.hard_math = 1; | 1042 | new_cpu_data.hard_math = 1; |
1029 | 1043 | ||
1044 | /* We don't have features. We have puppies! Puppies! */ | ||
1030 | #ifdef CONFIG_X86_MCE | 1045 | #ifdef CONFIG_X86_MCE |
1031 | mce_disabled = 1; | 1046 | mce_disabled = 1; |
1032 | #endif | 1047 | #endif |
@@ -1044,10 +1059,11 @@ __init void lguest_init(void) | |||
1044 | virtio_cons_early_init(early_put_chars); | 1059 | virtio_cons_early_init(early_put_chars); |
1045 | 1060 | ||
1046 | /* Last of all, we set the power management poweroff hook to point to | 1061 | /* Last of all, we set the power management poweroff hook to point to |
1047 | * the Guest routine to power off. */ | 1062 | * the Guest routine to power off, and the reboot hook to our restart |
1063 | * routine. */ | ||
1048 | pm_power_off = lguest_power_off; | 1064 | pm_power_off = lguest_power_off; |
1049 | |||
1050 | machine_ops.restart = lguest_restart; | 1065 | machine_ops.restart = lguest_restart; |
1066 | |||
1051 | /* Now we're set up, call start_kernel() in init/main.c and we proceed | 1067 | /* Now we're set up, call start_kernel() in init/main.c and we proceed |
1052 | * to boot as normal. It never returns. */ | 1068 | * to boot as normal. It never returns. */ |
1053 | start_kernel(); | 1069 | start_kernel(); |
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 95b6fbcded63..5c7cef34c9e7 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S | |||
@@ -5,13 +5,20 @@ | |||
5 | #include <asm/thread_info.h> | 5 | #include <asm/thread_info.h> |
6 | #include <asm/processor-flags.h> | 6 | #include <asm/processor-flags.h> |
7 | 7 | ||
8 | /*G:020 This is where we begin: head.S notes that the boot header's platform | 8 | /*G:020 Our story starts with the kernel booting into startup_32 in |
9 | * type field is "1" (lguest), so calls us here. | 9 | * arch/x86/kernel/head_32.S. It expects a boot header, which is created by |
10 | * the bootloader (the Launcher in our case). | ||
11 | * | ||
12 | * The startup_32 function does very little: it clears the uninitialized global | ||
13 | * C variables which we expect to be zero (ie. BSS) and then copies the boot | ||
14 | * header and kernel command line somewhere safe. Finally it checks the | ||
15 | * 'hardware_subarch' field. This was introduced in 2.6.24 for lguest and Xen: | ||
16 | * if it's set to '1' (lguest's assigned number), then it calls us here. | ||
10 | * | 17 | * |
11 | * WARNING: be very careful here! We're running at addresses equal to physical | 18 | * WARNING: be very careful here! We're running at addresses equal to physical |
12 | * addesses (around 0), not above PAGE_OFFSET as most code expectes | 19 | * addesses (around 0), not above PAGE_OFFSET as most code expectes |
13 | * (eg. 0xC0000000). Jumps are relative, so they're OK, but we can't touch any | 20 | * (eg. 0xC0000000). Jumps are relative, so they're OK, but we can't touch any |
14 | * data. | 21 | * data without remembering to subtract __PAGE_OFFSET! |
15 | * | 22 | * |
16 | * The .section line puts this code in .init.text so it will be discarded after | 23 | * The .section line puts this code in .init.text so it will be discarded after |
17 | * boot. */ | 24 | * boot. */ |
@@ -24,7 +31,7 @@ ENTRY(lguest_entry) | |||
24 | int $LGUEST_TRAP_ENTRY | 31 | int $LGUEST_TRAP_ENTRY |
25 | 32 | ||
26 | /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl | 33 | /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl |
27 | * instruction uses %esi implicitly as the source for the copy we' | 34 | * instruction uses %esi implicitly as the source for the copy we're |
28 | * about to do. */ | 35 | * about to do. */ |
29 | movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi | 36 | movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi |
30 | 37 | ||
diff --git a/arch/x86/mach-rdc321x/gpio.c b/arch/x86/mach-rdc321x/gpio.c index 031269163bd6..247f33d3a407 100644 --- a/arch/x86/mach-rdc321x/gpio.c +++ b/arch/x86/mach-rdc321x/gpio.c | |||
@@ -1,91 +1,194 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007, OpenWrt.org, Florian Fainelli <florian@openwrt.org> | 2 | * GPIO support for RDC SoC R3210/R8610 |
3 | * RDC321x architecture specific GPIO support | 3 | * |
4 | * Copyright (C) 2007, Florian Fainelli <florian@openwrt.org> | ||
5 | * Copyright (C) 2008, Volker Weiss <dev@tintuc.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
4 | * | 20 | * |
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License as published by the | ||
7 | * Free Software Foundation; either version 2 of the License, or (at your | ||
8 | * option) any later version. | ||
9 | */ | 21 | */ |
10 | 22 | ||
11 | #include <linux/autoconf.h> | 23 | |
12 | #include <linux/init.h> | 24 | #include <linux/spinlock.h> |
13 | #include <linux/io.h> | 25 | #include <linux/io.h> |
14 | #include <linux/types.h> | 26 | #include <linux/types.h> |
15 | #include <linux/module.h> | 27 | #include <linux/module.h> |
16 | #include <linux/delay.h> | ||
17 | 28 | ||
29 | #include <asm/gpio.h> | ||
18 | #include <asm/mach-rdc321x/rdc321x_defs.h> | 30 | #include <asm/mach-rdc321x/rdc321x_defs.h> |
19 | 31 | ||
20 | static inline int rdc_gpio_is_valid(unsigned gpio) | 32 | |
33 | /* spin lock to protect our private copy of GPIO data register plus | ||
34 | the access to PCI conf registers. */ | ||
35 | static DEFINE_SPINLOCK(gpio_lock); | ||
36 | |||
37 | /* copy of GPIO data registers */ | ||
38 | static u32 gpio_data_reg1; | ||
39 | static u32 gpio_data_reg2; | ||
40 | |||
41 | static u32 gpio_request_data[2]; | ||
42 | |||
43 | |||
44 | static inline void rdc321x_conf_write(unsigned addr, u32 value) | ||
21 | { | 45 | { |
22 | return (gpio <= RDC_MAX_GPIO); | 46 | outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR); |
47 | outl(value, RDC3210_CFGREG_DATA); | ||
23 | } | 48 | } |
24 | 49 | ||
25 | static unsigned int rdc_gpio_read(unsigned gpio) | 50 | static inline void rdc321x_conf_or(unsigned addr, u32 value) |
26 | { | 51 | { |
27 | unsigned int val; | 52 | outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR); |
28 | 53 | value |= inl(RDC3210_CFGREG_DATA); | |
29 | val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x84:0x48)); | 54 | outl(value, RDC3210_CFGREG_DATA); |
30 | outl(val, RDC3210_CFGREG_ADDR); | ||
31 | udelay(10); | ||
32 | val = inl(RDC3210_CFGREG_DATA); | ||
33 | val |= (0x1 << (gpio & 0x1F)); | ||
34 | outl(val, RDC3210_CFGREG_DATA); | ||
35 | udelay(10); | ||
36 | val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x88:0x4C)); | ||
37 | outl(val, RDC3210_CFGREG_ADDR); | ||
38 | udelay(10); | ||
39 | val = inl(RDC3210_CFGREG_DATA); | ||
40 | |||
41 | return val; | ||
42 | } | 55 | } |
43 | 56 | ||
44 | static void rdc_gpio_write(unsigned int val) | 57 | static inline u32 rdc321x_conf_read(unsigned addr) |
45 | { | 58 | { |
46 | if (val) { | 59 | outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR); |
47 | outl(val, RDC3210_CFGREG_DATA); | 60 | |
48 | udelay(10); | 61 | return inl(RDC3210_CFGREG_DATA); |
49 | } | ||
50 | } | 62 | } |
51 | 63 | ||
52 | int rdc_gpio_get_value(unsigned gpio) | 64 | /* configure pin as GPIO */ |
65 | static void rdc321x_configure_gpio(unsigned gpio) | ||
66 | { | ||
67 | unsigned long flags; | ||
68 | |||
69 | spin_lock_irqsave(&gpio_lock, flags); | ||
70 | rdc321x_conf_or(gpio < 32 | ||
71 | ? RDC321X_GPIO_CTRL_REG1 : RDC321X_GPIO_CTRL_REG2, | ||
72 | 1 << (gpio & 0x1f)); | ||
73 | spin_unlock_irqrestore(&gpio_lock, flags); | ||
74 | } | ||
75 | |||
76 | /* initially setup the 2 copies of the gpio data registers. | ||
77 | This function must be called by the platform setup code. */ | ||
78 | void __init rdc321x_gpio_setup() | ||
79 | { | ||
80 | /* this might not be, what others (BIOS, bootloader, etc.) | ||
81 | wrote to these registers before, but it's a good guess. Still | ||
82 | better than just using 0xffffffff. */ | ||
83 | |||
84 | gpio_data_reg1 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG1); | ||
85 | gpio_data_reg2 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG2); | ||
86 | } | ||
87 | |||
88 | /* determine, if gpio number is valid */ | ||
89 | static inline int rdc321x_is_gpio(unsigned gpio) | ||
90 | { | ||
91 | return gpio <= RDC321X_MAX_GPIO; | ||
92 | } | ||
93 | |||
94 | /* request GPIO */ | ||
95 | int rdc_gpio_request(unsigned gpio, const char *label) | ||
53 | { | 96 | { |
54 | if (rdc_gpio_is_valid(gpio)) | 97 | unsigned long flags; |
55 | return (int)rdc_gpio_read(gpio); | 98 | |
56 | else | 99 | if (!rdc321x_is_gpio(gpio)) |
57 | return -EINVAL; | 100 | return -EINVAL; |
101 | |||
102 | spin_lock_irqsave(&gpio_lock, flags); | ||
103 | if (gpio_request_data[(gpio & 0x20) ? 1 : 0] & (1 << (gpio & 0x1f))) | ||
104 | goto inuse; | ||
105 | gpio_request_data[(gpio & 0x20) ? 1 : 0] |= (1 << (gpio & 0x1f)); | ||
106 | spin_unlock_irqrestore(&gpio_lock, flags); | ||
107 | |||
108 | return 0; | ||
109 | inuse: | ||
110 | spin_unlock_irqrestore(&gpio_lock, flags); | ||
111 | return -EINVAL; | ||
58 | } | 112 | } |
59 | EXPORT_SYMBOL(rdc_gpio_get_value); | 113 | EXPORT_SYMBOL(rdc_gpio_request); |
60 | 114 | ||
61 | void rdc_gpio_set_value(unsigned gpio, int value) | 115 | /* release previously-claimed GPIO */ |
116 | void rdc_gpio_free(unsigned gpio) | ||
62 | { | 117 | { |
63 | unsigned int val; | 118 | unsigned long flags; |
64 | 119 | ||
65 | if (!rdc_gpio_is_valid(gpio)) | 120 | if (!rdc321x_is_gpio(gpio)) |
66 | return; | 121 | return; |
67 | 122 | ||
68 | val = rdc_gpio_read(gpio); | 123 | spin_lock_irqsave(&gpio_lock, flags); |
124 | gpio_request_data[(gpio & 0x20) ? 1 : 0] &= ~(1 << (gpio & 0x1f)); | ||
125 | spin_unlock_irqrestore(&gpio_lock, flags); | ||
126 | } | ||
127 | EXPORT_SYMBOL(rdc_gpio_free); | ||
128 | |||
129 | /* read GPIO pin */ | ||
130 | int rdc_gpio_get_value(unsigned gpio) | ||
131 | { | ||
132 | u32 reg; | ||
133 | unsigned long flags; | ||
134 | |||
135 | spin_lock_irqsave(&gpio_lock, flags); | ||
136 | reg = rdc321x_conf_read(gpio < 32 | ||
137 | ? RDC321X_GPIO_DATA_REG1 : RDC321X_GPIO_DATA_REG2); | ||
138 | spin_unlock_irqrestore(&gpio_lock, flags); | ||
69 | 139 | ||
70 | if (value) | 140 | return (1 << (gpio & 0x1f)) & reg ? 1 : 0; |
71 | val &= ~(0x1 << (gpio & 0x1F)); | 141 | } |
72 | else | 142 | EXPORT_SYMBOL(rdc_gpio_get_value); |
73 | val |= (0x1 << (gpio & 0x1F)); | ||
74 | 143 | ||
75 | rdc_gpio_write(val); | 144 | /* set GPIO pin to value */ |
145 | void rdc_gpio_set_value(unsigned gpio, int value) | ||
146 | { | ||
147 | unsigned long flags; | ||
148 | u32 reg; | ||
149 | |||
150 | reg = 1 << (gpio & 0x1f); | ||
151 | if (gpio < 32) { | ||
152 | spin_lock_irqsave(&gpio_lock, flags); | ||
153 | if (value) | ||
154 | gpio_data_reg1 |= reg; | ||
155 | else | ||
156 | gpio_data_reg1 &= ~reg; | ||
157 | rdc321x_conf_write(RDC321X_GPIO_DATA_REG1, gpio_data_reg1); | ||
158 | spin_unlock_irqrestore(&gpio_lock, flags); | ||
159 | } else { | ||
160 | spin_lock_irqsave(&gpio_lock, flags); | ||
161 | if (value) | ||
162 | gpio_data_reg2 |= reg; | ||
163 | else | ||
164 | gpio_data_reg2 &= ~reg; | ||
165 | rdc321x_conf_write(RDC321X_GPIO_DATA_REG2, gpio_data_reg2); | ||
166 | spin_unlock_irqrestore(&gpio_lock, flags); | ||
167 | } | ||
76 | } | 168 | } |
77 | EXPORT_SYMBOL(rdc_gpio_set_value); | 169 | EXPORT_SYMBOL(rdc_gpio_set_value); |
78 | 170 | ||
171 | /* configure GPIO pin as input */ | ||
79 | int rdc_gpio_direction_input(unsigned gpio) | 172 | int rdc_gpio_direction_input(unsigned gpio) |
80 | { | 173 | { |
174 | if (!rdc321x_is_gpio(gpio)) | ||
175 | return -EINVAL; | ||
176 | |||
177 | rdc321x_configure_gpio(gpio); | ||
178 | |||
81 | return 0; | 179 | return 0; |
82 | } | 180 | } |
83 | EXPORT_SYMBOL(rdc_gpio_direction_input); | 181 | EXPORT_SYMBOL(rdc_gpio_direction_input); |
84 | 182 | ||
183 | /* configure GPIO pin as output and set value */ | ||
85 | int rdc_gpio_direction_output(unsigned gpio, int value) | 184 | int rdc_gpio_direction_output(unsigned gpio, int value) |
86 | { | 185 | { |
186 | if (!rdc321x_is_gpio(gpio)) | ||
187 | return -EINVAL; | ||
188 | |||
189 | gpio_set_value(gpio, value); | ||
190 | rdc321x_configure_gpio(gpio); | ||
191 | |||
87 | return 0; | 192 | return 0; |
88 | } | 193 | } |
89 | EXPORT_SYMBOL(rdc_gpio_direction_output); | 194 | EXPORT_SYMBOL(rdc_gpio_direction_output); |
90 | |||
91 | |||
diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c index dda6024a5862..a037041817c7 100644 --- a/arch/x86/mach-rdc321x/platform.c +++ b/arch/x86/mach-rdc321x/platform.c | |||
@@ -62,6 +62,8 @@ static struct platform_device *rdc321x_devs[] = { | |||
62 | 62 | ||
63 | static int __init rdc_board_setup(void) | 63 | static int __init rdc_board_setup(void) |
64 | { | 64 | { |
65 | rdc321x_gpio_setup(); | ||
66 | |||
65 | return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs)); | 67 | return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs)); |
66 | } | 68 | } |
67 | 69 | ||
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index c394ca0720b8..8e25e06ff730 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
@@ -324,7 +324,6 @@ unsigned long __init setup_memory(void) | |||
324 | * this space and use it to adjust the boundary between ZONE_NORMAL | 324 | * this space and use it to adjust the boundary between ZONE_NORMAL |
325 | * and ZONE_HIGHMEM. | 325 | * and ZONE_HIGHMEM. |
326 | */ | 326 | */ |
327 | find_max_pfn(); | ||
328 | get_memcfg_numa(); | 327 | get_memcfg_numa(); |
329 | 328 | ||
330 | kva_pages = calculate_numa_remap_pages(); | 329 | kva_pages = calculate_numa_remap_pages(); |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fdc667422df9..ec08d8389850 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -91,12 +91,10 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr, | |||
91 | int prefetch = 0; | 91 | int prefetch = 0; |
92 | unsigned char *max_instr; | 92 | unsigned char *max_instr; |
93 | 93 | ||
94 | #ifdef CONFIG_X86_32 | 94 | /* |
95 | if (!(__supported_pte_mask & _PAGE_NX)) | 95 | * If it was a exec (instruction fetch) fault on NX page, then |
96 | return 0; | 96 | * do not ignore the fault: |
97 | #endif | 97 | */ |
98 | |||
99 | /* If it was a exec fault on NX page, ignore */ | ||
100 | if (error_code & PF_INSTR) | 98 | if (error_code & PF_INSTR) |
101 | return 0; | 99 | return 0; |
102 | 100 | ||
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 3d936f232704..9cf33d3ee5bc 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c | |||
@@ -73,15 +73,15 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) | |||
73 | { | 73 | { |
74 | enum fixed_addresses idx; | 74 | enum fixed_addresses idx; |
75 | unsigned long vaddr; | 75 | unsigned long vaddr; |
76 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ | ||
77 | |||
78 | debug_kmap_atomic_prot(type); | ||
79 | 76 | ||
77 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ | ||
80 | pagefault_disable(); | 78 | pagefault_disable(); |
81 | 79 | ||
82 | if (!PageHighMem(page)) | 80 | if (!PageHighMem(page)) |
83 | return page_address(page); | 81 | return page_address(page); |
84 | 82 | ||
83 | debug_kmap_atomic_prot(type); | ||
84 | |||
85 | idx = type + KM_TYPE_NR*smp_processor_id(); | 85 | idx = type + KM_TYPE_NR*smp_processor_id(); |
86 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | 86 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
87 | BUG_ON(!pte_none(*(kmap_pte-idx))); | 87 | BUG_ON(!pte_none(*(kmap_pte-idx))); |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 4fbafb4bc2f0..0b3d567e686d 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -178,7 +178,7 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | |||
178 | 178 | ||
179 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; | 179 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; |
180 | 180 | ||
181 | WARN_ON(!PageCompound(page)); | 181 | WARN_ON(!PageHead(page)); |
182 | 182 | ||
183 | return page; | 183 | return page; |
184 | } | 184 | } |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 4afaba0ed722..794895c6dcc9 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -137,7 +137,11 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, | |||
137 | switch (mode) { | 137 | switch (mode) { |
138 | case IOR_MODE_UNCACHED: | 138 | case IOR_MODE_UNCACHED: |
139 | default: | 139 | default: |
140 | prot = PAGE_KERNEL_NOCACHE; | 140 | /* |
141 | * FIXME: we will use UC MINUS for now, as video fb drivers | ||
142 | * depend on it. Upcoming ioremap_wc() will fix this behavior. | ||
143 | */ | ||
144 | prot = PAGE_KERNEL_UC_MINUS; | ||
141 | break; | 145 | break; |
142 | case IOR_MODE_CACHED: | 146 | case IOR_MODE_CACHED: |
143 | prot = PAGE_KERNEL; | 147 | prot = PAGE_KERNEL; |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 14e48b5a94ba..7b79f6be4e7d 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -771,7 +771,7 @@ static inline int change_page_attr_clear(unsigned long addr, int numpages, | |||
771 | int set_memory_uc(unsigned long addr, int numpages) | 771 | int set_memory_uc(unsigned long addr, int numpages) |
772 | { | 772 | { |
773 | return change_page_attr_set(addr, numpages, | 773 | return change_page_attr_set(addr, numpages, |
774 | __pgprot(_PAGE_PCD | _PAGE_PWT)); | 774 | __pgprot(_PAGE_PCD)); |
775 | } | 775 | } |
776 | EXPORT_SYMBOL(set_memory_uc); | 776 | EXPORT_SYMBOL(set_memory_uc); |
777 | 777 | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 8b9ee27805fd..27ee26aedf94 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -95,7 +95,7 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; | |||
95 | * | 95 | * |
96 | * 0: not available, 1: available | 96 | * 0: not available, 1: available |
97 | */ | 97 | */ |
98 | static int have_vcpu_info_placement = 0; | 98 | static int have_vcpu_info_placement = 1; |
99 | 99 | ||
100 | static void __init xen_vcpu_setup(int cpu) | 100 | static void __init xen_vcpu_setup(int cpu) |
101 | { | 101 | { |
@@ -103,6 +103,7 @@ static void __init xen_vcpu_setup(int cpu) | |||
103 | int err; | 103 | int err; |
104 | struct vcpu_info *vcpup; | 104 | struct vcpu_info *vcpup; |
105 | 105 | ||
106 | BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info); | ||
106 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 107 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
107 | 108 | ||
108 | if (!have_vcpu_info_placement) | 109 | if (!have_vcpu_info_placement) |
@@ -666,10 +667,10 @@ static void xen_release_pt_init(u32 pfn) | |||
666 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 667 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
667 | } | 668 | } |
668 | 669 | ||
669 | static void pin_pagetable_pfn(unsigned level, unsigned long pfn) | 670 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) |
670 | { | 671 | { |
671 | struct mmuext_op op; | 672 | struct mmuext_op op; |
672 | op.cmd = level; | 673 | op.cmd = cmd; |
673 | op.arg1.mfn = pfn_to_mfn(pfn); | 674 | op.arg1.mfn = pfn_to_mfn(pfn); |
674 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | 675 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) |
675 | BUG(); | 676 | BUG(); |
@@ -686,7 +687,8 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) | |||
686 | 687 | ||
687 | if (!PageHighMem(page)) { | 688 | if (!PageHighMem(page)) { |
688 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | 689 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); |
689 | pin_pagetable_pfn(level, pfn); | 690 | if (level == PT_PTE) |
691 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); | ||
690 | } else | 692 | } else |
691 | /* make sure there are no stray mappings of | 693 | /* make sure there are no stray mappings of |
692 | this page */ | 694 | this page */ |
@@ -696,27 +698,39 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) | |||
696 | 698 | ||
697 | static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) | 699 | static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) |
698 | { | 700 | { |
699 | xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L1_TABLE); | 701 | xen_alloc_ptpage(mm, pfn, PT_PTE); |
700 | } | 702 | } |
701 | 703 | ||
702 | static void xen_alloc_pd(struct mm_struct *mm, u32 pfn) | 704 | static void xen_alloc_pd(struct mm_struct *mm, u32 pfn) |
703 | { | 705 | { |
704 | xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L2_TABLE); | 706 | xen_alloc_ptpage(mm, pfn, PT_PMD); |
705 | } | 707 | } |
706 | 708 | ||
707 | /* This should never happen until we're OK to use struct page */ | 709 | /* This should never happen until we're OK to use struct page */ |
708 | static void xen_release_pt(u32 pfn) | 710 | static void xen_release_ptpage(u32 pfn, unsigned level) |
709 | { | 711 | { |
710 | struct page *page = pfn_to_page(pfn); | 712 | struct page *page = pfn_to_page(pfn); |
711 | 713 | ||
712 | if (PagePinned(page)) { | 714 | if (PagePinned(page)) { |
713 | if (!PageHighMem(page)) { | 715 | if (!PageHighMem(page)) { |
714 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | 716 | if (level == PT_PTE) |
717 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | ||
715 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 718 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
716 | } | 719 | } |
720 | ClearPagePinned(page); | ||
717 | } | 721 | } |
718 | } | 722 | } |
719 | 723 | ||
724 | static void xen_release_pt(u32 pfn) | ||
725 | { | ||
726 | xen_release_ptpage(pfn, PT_PTE); | ||
727 | } | ||
728 | |||
729 | static void xen_release_pd(u32 pfn) | ||
730 | { | ||
731 | xen_release_ptpage(pfn, PT_PMD); | ||
732 | } | ||
733 | |||
720 | #ifdef CONFIG_HIGHPTE | 734 | #ifdef CONFIG_HIGHPTE |
721 | static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) | 735 | static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) |
722 | { | 736 | { |
@@ -805,33 +819,43 @@ static __init void xen_pagetable_setup_start(pgd_t *base) | |||
805 | PFN_DOWN(__pa(xen_start_info->pt_base))); | 819 | PFN_DOWN(__pa(xen_start_info->pt_base))); |
806 | } | 820 | } |
807 | 821 | ||
808 | static __init void xen_pagetable_setup_done(pgd_t *base) | 822 | static __init void setup_shared_info(void) |
809 | { | 823 | { |
810 | /* This will work as long as patching hasn't happened yet | ||
811 | (which it hasn't) */ | ||
812 | pv_mmu_ops.alloc_pt = xen_alloc_pt; | ||
813 | pv_mmu_ops.alloc_pd = xen_alloc_pd; | ||
814 | pv_mmu_ops.release_pt = xen_release_pt; | ||
815 | pv_mmu_ops.release_pd = xen_release_pt; | ||
816 | pv_mmu_ops.set_pte = xen_set_pte; | ||
817 | |||
818 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 824 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
825 | unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); | ||
826 | |||
819 | /* | 827 | /* |
820 | * Create a mapping for the shared info page. | 828 | * Create a mapping for the shared info page. |
821 | * Should be set_fixmap(), but shared_info is a machine | 829 | * Should be set_fixmap(), but shared_info is a machine |
822 | * address with no corresponding pseudo-phys address. | 830 | * address with no corresponding pseudo-phys address. |
823 | */ | 831 | */ |
824 | set_pte_mfn(fix_to_virt(FIX_PARAVIRT_BOOTMAP), | 832 | set_pte_mfn(addr, |
825 | PFN_DOWN(xen_start_info->shared_info), | 833 | PFN_DOWN(xen_start_info->shared_info), |
826 | PAGE_KERNEL); | 834 | PAGE_KERNEL); |
827 | 835 | ||
828 | HYPERVISOR_shared_info = | 836 | HYPERVISOR_shared_info = (struct shared_info *)addr; |
829 | (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); | ||
830 | |||
831 | } else | 837 | } else |
832 | HYPERVISOR_shared_info = | 838 | HYPERVISOR_shared_info = |
833 | (struct shared_info *)__va(xen_start_info->shared_info); | 839 | (struct shared_info *)__va(xen_start_info->shared_info); |
834 | 840 | ||
841 | #ifndef CONFIG_SMP | ||
842 | /* In UP this is as good a place as any to set up shared info */ | ||
843 | xen_setup_vcpu_info_placement(); | ||
844 | #endif | ||
845 | } | ||
846 | |||
847 | static __init void xen_pagetable_setup_done(pgd_t *base) | ||
848 | { | ||
849 | /* This will work as long as patching hasn't happened yet | ||
850 | (which it hasn't) */ | ||
851 | pv_mmu_ops.alloc_pt = xen_alloc_pt; | ||
852 | pv_mmu_ops.alloc_pd = xen_alloc_pd; | ||
853 | pv_mmu_ops.release_pt = xen_release_pt; | ||
854 | pv_mmu_ops.release_pd = xen_release_pd; | ||
855 | pv_mmu_ops.set_pte = xen_set_pte; | ||
856 | |||
857 | setup_shared_info(); | ||
858 | |||
835 | /* Actually pin the pagetable down, but we can't set PG_pinned | 859 | /* Actually pin the pagetable down, but we can't set PG_pinned |
836 | yet because the page structures don't exist yet. */ | 860 | yet because the page structures don't exist yet. */ |
837 | { | 861 | { |
@@ -1182,15 +1206,9 @@ asmlinkage void __init xen_start_kernel(void) | |||
1182 | x86_write_percpu(xen_cr3, __pa(pgd)); | 1206 | x86_write_percpu(xen_cr3, __pa(pgd)); |
1183 | x86_write_percpu(xen_current_cr3, __pa(pgd)); | 1207 | x86_write_percpu(xen_current_cr3, __pa(pgd)); |
1184 | 1208 | ||
1185 | #ifdef CONFIG_SMP | ||
1186 | /* Don't do the full vcpu_info placement stuff until we have a | 1209 | /* Don't do the full vcpu_info placement stuff until we have a |
1187 | possible map. */ | 1210 | possible map and a non-dummy shared_info. */ |
1188 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; | 1211 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; |
1189 | #else | ||
1190 | /* May as well do it now, since there's no good time to call | ||
1191 | it later on UP. */ | ||
1192 | xen_setup_vcpu_info_placement(); | ||
1193 | #endif | ||
1194 | 1212 | ||
1195 | pv_info.kernel_rpl = 1; | 1213 | pv_info.kernel_rpl = 1; |
1196 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) | 1214 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 0144395448ae..2a054ef2a3da 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -310,13 +310,6 @@ pgd_t xen_make_pgd(unsigned long pgd) | |||
310 | } | 310 | } |
311 | #endif /* CONFIG_X86_PAE */ | 311 | #endif /* CONFIG_X86_PAE */ |
312 | 312 | ||
313 | enum pt_level { | ||
314 | PT_PGD, | ||
315 | PT_PUD, | ||
316 | PT_PMD, | ||
317 | PT_PTE | ||
318 | }; | ||
319 | |||
320 | /* | 313 | /* |
321 | (Yet another) pagetable walker. This one is intended for pinning a | 314 | (Yet another) pagetable walker. This one is intended for pinning a |
322 | pagetable. This means that it walks a pagetable and calls the | 315 | pagetable. This means that it walks a pagetable and calls the |
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index c9ff27f3ac3a..b5e189b1519d 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -3,6 +3,13 @@ | |||
3 | #include <linux/linkage.h> | 3 | #include <linux/linkage.h> |
4 | #include <asm/page.h> | 4 | #include <asm/page.h> |
5 | 5 | ||
6 | enum pt_level { | ||
7 | PT_PGD, | ||
8 | PT_PUD, | ||
9 | PT_PMD, | ||
10 | PT_PTE | ||
11 | }; | ||
12 | |||
6 | /* | 13 | /* |
7 | * Page-directory addresses above 4GB do not fit into architectural %cr3. | 14 | * Page-directory addresses above 4GB do not fit into architectural %cr3. |
8 | * When accessing %cr3, or equivalent field in vcpu_guest_context, guests | 15 | * When accessing %cr3, or equivalent field in vcpu_guest_context, guests |
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 1a43b60c0c62..6b7190449d07 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S | |||
@@ -33,12 +33,17 @@ | |||
33 | events, then enter the hypervisor to get them handled. | 33 | events, then enter the hypervisor to get them handled. |
34 | */ | 34 | */ |
35 | ENTRY(xen_irq_enable_direct) | 35 | ENTRY(xen_irq_enable_direct) |
36 | /* Clear mask and test pending */ | 36 | /* Unmask events */ |
37 | andw $0x00ff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending | 37 | movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask |
38 | |||
38 | /* Preempt here doesn't matter because that will deal with | 39 | /* Preempt here doesn't matter because that will deal with |
39 | any pending interrupts. The pending check may end up being | 40 | any pending interrupts. The pending check may end up being |
40 | run on the wrong CPU, but that doesn't hurt. */ | 41 | run on the wrong CPU, but that doesn't hurt. */ |
42 | |||
43 | /* Test for pending */ | ||
44 | testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending | ||
41 | jz 1f | 45 | jz 1f |
46 | |||
42 | 2: call check_events | 47 | 2: call check_events |
43 | 1: | 48 | 1: |
44 | ENDPATCH(xen_irq_enable_direct) | 49 | ENDPATCH(xen_irq_enable_direct) |