diff options
Diffstat (limited to 'arch/i386/kernel')
40 files changed, 2029 insertions, 285 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 1e8988e558c5..cbe4e601885c 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
@@ -40,8 +40,9 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | |||
40 | obj-$(CONFIG_HPET_TIMER) += hpet.o | 40 | obj-$(CONFIG_HPET_TIMER) += hpet.o |
41 | obj-$(CONFIG_K8_NB) += k8.o | 41 | obj-$(CONFIG_K8_NB) += k8.o |
42 | 42 | ||
43 | # Make sure this is linked after any other paravirt_ops structs: see head.S | 43 | obj-$(CONFIG_VMI) += vmi.o vmitime.o |
44 | obj-$(CONFIG_PARAVIRT) += paravirt.o | 44 | obj-$(CONFIG_PARAVIRT) += paravirt.o |
45 | obj-y += pcspeaker.o | ||
45 | 46 | ||
46 | EXTRA_AFLAGS := -traditional | 47 | EXTRA_AFLAGS := -traditional |
47 | 48 | ||
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 776d9be26af9..f4159e0a7ae9 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <asm/hpet.h> | 36 | #include <asm/hpet.h> |
37 | #include <asm/i8253.h> | 37 | #include <asm/i8253.h> |
38 | #include <asm/nmi.h> | 38 | #include <asm/nmi.h> |
39 | #include <asm/idle.h> | ||
39 | 40 | ||
40 | #include <mach_apic.h> | 41 | #include <mach_apic.h> |
41 | #include <mach_apicdef.h> | 42 | #include <mach_apicdef.h> |
@@ -1255,6 +1256,7 @@ fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) | |||
1255 | * Besides, if we don't timer interrupts ignore the global | 1256 | * Besides, if we don't timer interrupts ignore the global |
1256 | * interrupt lock, which is the WrongThing (tm) to do. | 1257 | * interrupt lock, which is the WrongThing (tm) to do. |
1257 | */ | 1258 | */ |
1259 | exit_idle(); | ||
1258 | irq_enter(); | 1260 | irq_enter(); |
1259 | smp_local_timer_interrupt(); | 1261 | smp_local_timer_interrupt(); |
1260 | irq_exit(); | 1262 | irq_exit(); |
@@ -1305,6 +1307,7 @@ fastcall void smp_spurious_interrupt(struct pt_regs *regs) | |||
1305 | { | 1307 | { |
1306 | unsigned long v; | 1308 | unsigned long v; |
1307 | 1309 | ||
1310 | exit_idle(); | ||
1308 | irq_enter(); | 1311 | irq_enter(); |
1309 | /* | 1312 | /* |
1310 | * Check if this really is a spurious interrupt and ACK it | 1313 | * Check if this really is a spurious interrupt and ACK it |
@@ -1329,6 +1332,7 @@ fastcall void smp_error_interrupt(struct pt_regs *regs) | |||
1329 | { | 1332 | { |
1330 | unsigned long v, v1; | 1333 | unsigned long v, v1; |
1331 | 1334 | ||
1335 | exit_idle(); | ||
1332 | irq_enter(); | 1336 | irq_enter(); |
1333 | /* First tickle the hardware, only then report what went on. -- REW */ | 1337 | /* First tickle the hardware, only then report what went on. -- REW */ |
1334 | v = apic_read(APIC_ESR); | 1338 | v = apic_read(APIC_ESR); |
@@ -1395,7 +1399,7 @@ int __init APIC_init_uniprocessor (void) | |||
1395 | if (!skip_ioapic_setup && nr_ioapics) | 1399 | if (!skip_ioapic_setup && nr_ioapics) |
1396 | setup_IO_APIC(); | 1400 | setup_IO_APIC(); |
1397 | #endif | 1401 | #endif |
1398 | setup_boot_APIC_clock(); | 1402 | setup_boot_clock(); |
1399 | 1403 | ||
1400 | return 0; | 1404 | return 0; |
1401 | } | 1405 | } |
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index db99a8948dae..f9ba0af7ee1f 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c | |||
@@ -211,6 +211,7 @@ | |||
211 | #include <linux/slab.h> | 211 | #include <linux/slab.h> |
212 | #include <linux/stat.h> | 212 | #include <linux/stat.h> |
213 | #include <linux/proc_fs.h> | 213 | #include <linux/proc_fs.h> |
214 | #include <linux/seq_file.h> | ||
214 | #include <linux/miscdevice.h> | 215 | #include <linux/miscdevice.h> |
215 | #include <linux/apm_bios.h> | 216 | #include <linux/apm_bios.h> |
216 | #include <linux/init.h> | 217 | #include <linux/init.h> |
@@ -1636,9 +1637,8 @@ static int do_open(struct inode * inode, struct file * filp) | |||
1636 | return 0; | 1637 | return 0; |
1637 | } | 1638 | } |
1638 | 1639 | ||
1639 | static int apm_get_info(char *buf, char **start, off_t fpos, int length) | 1640 | static int proc_apm_show(struct seq_file *m, void *v) |
1640 | { | 1641 | { |
1641 | char * p; | ||
1642 | unsigned short bx; | 1642 | unsigned short bx; |
1643 | unsigned short cx; | 1643 | unsigned short cx; |
1644 | unsigned short dx; | 1644 | unsigned short dx; |
@@ -1650,8 +1650,6 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length) | |||
1650 | int time_units = -1; | 1650 | int time_units = -1; |
1651 | char *units = "?"; | 1651 | char *units = "?"; |
1652 | 1652 | ||
1653 | p = buf; | ||
1654 | |||
1655 | if ((num_online_cpus() == 1) && | 1653 | if ((num_online_cpus() == 1) && |
1656 | !(error = apm_get_power_status(&bx, &cx, &dx))) { | 1654 | !(error = apm_get_power_status(&bx, &cx, &dx))) { |
1657 | ac_line_status = (bx >> 8) & 0xff; | 1655 | ac_line_status = (bx >> 8) & 0xff; |
@@ -1705,7 +1703,7 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length) | |||
1705 | -1: Unknown | 1703 | -1: Unknown |
1706 | 8) min = minutes; sec = seconds */ | 1704 | 8) min = minutes; sec = seconds */ |
1707 | 1705 | ||
1708 | p += sprintf(p, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", | 1706 | seq_printf(m, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", |
1709 | driver_version, | 1707 | driver_version, |
1710 | (apm_info.bios.version >> 8) & 0xff, | 1708 | (apm_info.bios.version >> 8) & 0xff, |
1711 | apm_info.bios.version & 0xff, | 1709 | apm_info.bios.version & 0xff, |
@@ -1716,10 +1714,22 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length) | |||
1716 | percentage, | 1714 | percentage, |
1717 | time_units, | 1715 | time_units, |
1718 | units); | 1716 | units); |
1717 | return 0; | ||
1718 | } | ||
1719 | 1719 | ||
1720 | return p - buf; | 1720 | static int proc_apm_open(struct inode *inode, struct file *file) |
1721 | { | ||
1722 | return single_open(file, proc_apm_show, NULL); | ||
1721 | } | 1723 | } |
1722 | 1724 | ||
1725 | static const struct file_operations apm_file_ops = { | ||
1726 | .owner = THIS_MODULE, | ||
1727 | .open = proc_apm_open, | ||
1728 | .read = seq_read, | ||
1729 | .llseek = seq_lseek, | ||
1730 | .release = single_release, | ||
1731 | }; | ||
1732 | |||
1723 | static int apm(void *unused) | 1733 | static int apm(void *unused) |
1724 | { | 1734 | { |
1725 | unsigned short bx; | 1735 | unsigned short bx; |
@@ -2341,9 +2351,9 @@ static int __init apm_init(void) | |||
2341 | set_base(gdt[APM_DS >> 3], | 2351 | set_base(gdt[APM_DS >> 3], |
2342 | __va((unsigned long)apm_info.bios.dseg << 4)); | 2352 | __va((unsigned long)apm_info.bios.dseg << 4)); |
2343 | 2353 | ||
2344 | apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info); | 2354 | apm_proc = create_proc_entry("apm", 0, NULL); |
2345 | if (apm_proc) | 2355 | if (apm_proc) |
2346 | apm_proc->owner = THIS_MODULE; | 2356 | apm_proc->proc_fops = &apm_file_ops; |
2347 | 2357 | ||
2348 | kapmd_task = kthread_create(apm, NULL, "kapmd"); | 2358 | kapmd_task = kthread_create(apm, NULL, "kapmd"); |
2349 | if (IS_ERR(kapmd_task)) { | 2359 | if (IS_ERR(kapmd_task)) { |
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 1b2f3cd33270..c37535163bfc 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c | |||
@@ -72,7 +72,7 @@ void foo(void) | |||
72 | OFFSET(PT_EAX, pt_regs, eax); | 72 | OFFSET(PT_EAX, pt_regs, eax); |
73 | OFFSET(PT_DS, pt_regs, xds); | 73 | OFFSET(PT_DS, pt_regs, xds); |
74 | OFFSET(PT_ES, pt_regs, xes); | 74 | OFFSET(PT_ES, pt_regs, xes); |
75 | OFFSET(PT_GS, pt_regs, xgs); | 75 | OFFSET(PT_FS, pt_regs, xfs); |
76 | OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); | 76 | OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); |
77 | OFFSET(PT_EIP, pt_regs, eip); | 77 | OFFSET(PT_EIP, pt_regs, eip); |
78 | OFFSET(PT_CS, pt_regs, xcs); | 78 | OFFSET(PT_CS, pt_regs, xcs); |
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 8a8bbdaaf38a..dcbbd0a8bfc2 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c | |||
@@ -605,7 +605,7 @@ void __init early_cpu_init(void) | |||
605 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) | 605 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) |
606 | { | 606 | { |
607 | memset(regs, 0, sizeof(struct pt_regs)); | 607 | memset(regs, 0, sizeof(struct pt_regs)); |
608 | regs->xgs = __KERNEL_PDA; | 608 | regs->xfs = __KERNEL_PDA; |
609 | return regs; | 609 | return regs; |
610 | } | 610 | } |
611 | 611 | ||
@@ -662,12 +662,12 @@ struct i386_pda boot_pda = { | |||
662 | .pcurrent = &init_task, | 662 | .pcurrent = &init_task, |
663 | }; | 663 | }; |
664 | 664 | ||
665 | static inline void set_kernel_gs(void) | 665 | static inline void set_kernel_fs(void) |
666 | { | 666 | { |
667 | /* Set %gs for this CPU's PDA. Memory clobber is to create a | 667 | /* Set %fs for this CPU's PDA. Memory clobber is to create a |
668 | barrier with respect to any PDA operations, so the compiler | 668 | barrier with respect to any PDA operations, so the compiler |
669 | doesn't move any before here. */ | 669 | doesn't move any before here. */ |
670 | asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); | 670 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory"); |
671 | } | 671 | } |
672 | 672 | ||
673 | /* Initialize the CPU's GDT and PDA. The boot CPU does this for | 673 | /* Initialize the CPU's GDT and PDA. The boot CPU does this for |
@@ -718,7 +718,7 @@ void __cpuinit cpu_set_gdt(int cpu) | |||
718 | the boot CPU, this will transition from the boot gdt+pda to | 718 | the boot CPU, this will transition from the boot gdt+pda to |
719 | the real ones). */ | 719 | the real ones). */ |
720 | load_gdt(cpu_gdt_descr); | 720 | load_gdt(cpu_gdt_descr); |
721 | set_kernel_gs(); | 721 | set_kernel_fs(); |
722 | } | 722 | } |
723 | 723 | ||
724 | /* Common CPU init for both boot and secondary CPUs */ | 724 | /* Common CPU init for both boot and secondary CPUs */ |
@@ -764,8 +764,8 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) | |||
764 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); | 764 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); |
765 | #endif | 765 | #endif |
766 | 766 | ||
767 | /* Clear %fs. */ | 767 | /* Clear %gs. */ |
768 | asm volatile ("mov %0, %%fs" : : "r" (0)); | 768 | asm volatile ("mov %0, %%gs" : : "r" (0)); |
769 | 769 | ||
770 | /* Clear all 6 debug registers: */ | 770 | /* Clear all 6 debug registers: */ |
771 | set_debugreg(0, 0); | 771 | set_debugreg(0, 0); |
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index c0c3b59de32c..de27bd07bc9c 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <asm/io.h> | 6 | #include <asm/io.h> |
7 | #include <asm/processor.h> | 7 | #include <asm/processor.h> |
8 | #include <asm/timer.h> | 8 | #include <asm/timer.h> |
9 | #include <asm/pci-direct.h> | ||
9 | 10 | ||
10 | #include "cpu.h" | 11 | #include "cpu.h" |
11 | 12 | ||
@@ -161,19 +162,19 @@ static void __cpuinit set_cx86_inc(void) | |||
161 | static void __cpuinit geode_configure(void) | 162 | static void __cpuinit geode_configure(void) |
162 | { | 163 | { |
163 | unsigned long flags; | 164 | unsigned long flags; |
164 | u8 ccr3, ccr4; | 165 | u8 ccr3; |
165 | local_irq_save(flags); | 166 | local_irq_save(flags); |
166 | 167 | ||
167 | /* Suspend on halt power saving and enable #SUSP pin */ | 168 | /* Suspend on halt power saving and enable #SUSP pin */ |
168 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); | 169 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); |
169 | 170 | ||
170 | ccr3 = getCx86(CX86_CCR3); | 171 | ccr3 = getCx86(CX86_CCR3); |
171 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* Enable */ | 172 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ |
172 | |||
173 | ccr4 = getCx86(CX86_CCR4); | ||
174 | ccr4 |= 0x38; /* FPU fast, DTE cache, Mem bypass */ | ||
175 | 173 | ||
176 | setCx86(CX86_CCR3, ccr3); | 174 | |
175 | /* FPU fast, DTE cache, Mem bypass */ | ||
176 | setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); | ||
177 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | ||
177 | 178 | ||
178 | set_cx86_memwb(); | 179 | set_cx86_memwb(); |
179 | set_cx86_reorder(); | 180 | set_cx86_reorder(); |
@@ -183,14 +184,6 @@ static void __cpuinit geode_configure(void) | |||
183 | } | 184 | } |
184 | 185 | ||
185 | 186 | ||
186 | #ifdef CONFIG_PCI | ||
187 | static struct pci_device_id __cpuinitdata cyrix_55x0[] = { | ||
188 | { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) }, | ||
189 | { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) }, | ||
190 | { }, | ||
191 | }; | ||
192 | #endif | ||
193 | |||
194 | static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | 187 | static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) |
195 | { | 188 | { |
196 | unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; | 189 | unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; |
@@ -258,6 +251,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
258 | 251 | ||
259 | case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ | 252 | case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ |
260 | #ifdef CONFIG_PCI | 253 | #ifdef CONFIG_PCI |
254 | { | ||
255 | u32 vendor, device; | ||
261 | /* It isn't really a PCI quirk directly, but the cure is the | 256 | /* It isn't really a PCI quirk directly, but the cure is the |
262 | same. The MediaGX has deep magic SMM stuff that handles the | 257 | same. The MediaGX has deep magic SMM stuff that handles the |
263 | SB emulation. It thows away the fifo on disable_dma() which | 258 | SB emulation. It thows away the fifo on disable_dma() which |
@@ -273,22 +268,34 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
273 | printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); | 268 | printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); |
274 | isa_dma_bridge_buggy = 2; | 269 | isa_dma_bridge_buggy = 2; |
275 | 270 | ||
271 | /* We do this before the PCI layer is running. However we | ||
272 | are safe here as we know the bridge must be a Cyrix | ||
273 | companion and must be present */ | ||
274 | vendor = read_pci_config_16(0, 0, 0x12, PCI_VENDOR_ID); | ||
275 | device = read_pci_config_16(0, 0, 0x12, PCI_DEVICE_ID); | ||
276 | 276 | ||
277 | /* | 277 | /* |
278 | * The 5510/5520 companion chips have a funky PIT. | 278 | * The 5510/5520 companion chips have a funky PIT. |
279 | */ | 279 | */ |
280 | if (pci_dev_present(cyrix_55x0)) | 280 | if (vendor == PCI_VENDOR_ID_CYRIX && |
281 | (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) | ||
281 | pit_latch_buggy = 1; | 282 | pit_latch_buggy = 1; |
283 | } | ||
282 | #endif | 284 | #endif |
283 | c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ | 285 | c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ |
284 | 286 | ||
285 | /* GXm supports extended cpuid levels 'ala' AMD */ | 287 | /* GXm supports extended cpuid levels 'ala' AMD */ |
286 | if (c->cpuid_level == 2) { | 288 | if (c->cpuid_level == 2) { |
287 | /* Enable cxMMX extensions (GX1 Datasheet 54) */ | 289 | /* Enable cxMMX extensions (GX1 Datasheet 54) */ |
288 | setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); | 290 | setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); |
289 | 291 | ||
290 | /* GXlv/GXm/GX1 */ | 292 | /* |
291 | if((dir1 >= 0x50 && dir1 <= 0x54) || dir1 >= 0x63) | 293 | * GXm : 0x30 ... 0x5f GXm datasheet 51 |
294 | * GXlv: 0x6x GXlv datasheet 54 | ||
295 | * ? : 0x7x | ||
296 | * GX1 : 0x8x GX1 datasheet 56 | ||
297 | */ | ||
298 | if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f)) | ||
292 | geode_configure(); | 299 | geode_configure(); |
293 | get_model_name(c); /* get CPU marketing name */ | 300 | get_model_name(c); /* get CPU marketing name */ |
294 | return; | 301 | return; |
@@ -415,15 +422,14 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c) | |||
415 | 422 | ||
416 | if (dir0 == 5 || dir0 == 3) | 423 | if (dir0 == 5 || dir0 == 3) |
417 | { | 424 | { |
418 | unsigned char ccr3, ccr4; | 425 | unsigned char ccr3; |
419 | unsigned long flags; | 426 | unsigned long flags; |
420 | printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); | 427 | printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); |
421 | local_irq_save(flags); | 428 | local_irq_save(flags); |
422 | ccr3 = getCx86(CX86_CCR3); | 429 | ccr3 = getCx86(CX86_CCR3); |
423 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | 430 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ |
424 | ccr4 = getCx86(CX86_CCR4); | 431 | setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ |
425 | setCx86(CX86_CCR4, ccr4 | 0x80); /* enable cpuid */ | 432 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ |
426 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | ||
427 | local_irq_restore(flags); | 433 | local_irq_restore(flags); |
428 | } | 434 | } |
429 | } | 435 | } |
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index d555bec0db99..4f10c62d180c 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #include <asm/processor.h> | 13 | #include <asm/processor.h> |
14 | #include <asm/system.h> | 14 | #include <asm/system.h> |
15 | #include <asm/mce.h> | ||
15 | 16 | ||
16 | #include "mce.h" | 17 | #include "mce.h" |
17 | 18 | ||
diff --git a/arch/i386/kernel/cpu/mcheck/mce.h b/arch/i386/kernel/cpu/mcheck/mce.h index 84fd4cf7d0fb..81fb6e2d35f3 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.h +++ b/arch/i386/kernel/cpu/mcheck/mce.h | |||
@@ -1,4 +1,5 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <asm/mce.h> | ||
2 | 3 | ||
3 | void amd_mcheck_init(struct cpuinfo_x86 *c); | 4 | void amd_mcheck_init(struct cpuinfo_x86 *c); |
4 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | 5 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); |
@@ -9,6 +10,5 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c); | |||
9 | /* Call the installed machine check handler for this CPU setup. */ | 10 | /* Call the installed machine check handler for this CPU setup. */ |
10 | extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); | 11 | extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); |
11 | 12 | ||
12 | extern int mce_disabled; | ||
13 | extern int nr_mce_banks; | 13 | extern int nr_mce_banks; |
14 | 14 | ||
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index 504434a46011..8359c19d3a23 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
15 | #include <asm/idle.h> | ||
15 | 16 | ||
16 | #include <asm/therm_throt.h> | 17 | #include <asm/therm_throt.h> |
17 | 18 | ||
@@ -59,6 +60,7 @@ static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_therm | |||
59 | 60 | ||
60 | fastcall void smp_thermal_interrupt(struct pt_regs *regs) | 61 | fastcall void smp_thermal_interrupt(struct pt_regs *regs) |
61 | { | 62 | { |
63 | exit_idle(); | ||
62 | irq_enter(); | 64 | irq_enter(); |
63 | vendor_thermal_interrupt(regs); | 65 | vendor_thermal_interrupt(regs); |
64 | irq_exit(); | 66 | irq_exit(); |
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c index ee771f305f96..c7d8f1756745 100644 --- a/arch/i386/kernel/cpu/mtrr/if.c +++ b/arch/i386/kernel/cpu/mtrr/if.c | |||
@@ -211,6 +211,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
211 | default: | 211 | default: |
212 | return -ENOTTY; | 212 | return -ENOTTY; |
213 | case MTRRIOC_ADD_ENTRY: | 213 | case MTRRIOC_ADD_ENTRY: |
214 | #ifdef CONFIG_COMPAT | ||
215 | case MTRRIOC32_ADD_ENTRY: | ||
216 | #endif | ||
214 | if (!capable(CAP_SYS_ADMIN)) | 217 | if (!capable(CAP_SYS_ADMIN)) |
215 | return -EPERM; | 218 | return -EPERM; |
216 | err = | 219 | err = |
@@ -218,21 +221,33 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
218 | file, 0); | 221 | file, 0); |
219 | break; | 222 | break; |
220 | case MTRRIOC_SET_ENTRY: | 223 | case MTRRIOC_SET_ENTRY: |
224 | #ifdef CONFIG_COMPAT | ||
225 | case MTRRIOC32_SET_ENTRY: | ||
226 | #endif | ||
221 | if (!capable(CAP_SYS_ADMIN)) | 227 | if (!capable(CAP_SYS_ADMIN)) |
222 | return -EPERM; | 228 | return -EPERM; |
223 | err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); | 229 | err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); |
224 | break; | 230 | break; |
225 | case MTRRIOC_DEL_ENTRY: | 231 | case MTRRIOC_DEL_ENTRY: |
232 | #ifdef CONFIG_COMPAT | ||
233 | case MTRRIOC32_DEL_ENTRY: | ||
234 | #endif | ||
226 | if (!capable(CAP_SYS_ADMIN)) | 235 | if (!capable(CAP_SYS_ADMIN)) |
227 | return -EPERM; | 236 | return -EPERM; |
228 | err = mtrr_file_del(sentry.base, sentry.size, file, 0); | 237 | err = mtrr_file_del(sentry.base, sentry.size, file, 0); |
229 | break; | 238 | break; |
230 | case MTRRIOC_KILL_ENTRY: | 239 | case MTRRIOC_KILL_ENTRY: |
240 | #ifdef CONFIG_COMPAT | ||
241 | case MTRRIOC32_KILL_ENTRY: | ||
242 | #endif | ||
231 | if (!capable(CAP_SYS_ADMIN)) | 243 | if (!capable(CAP_SYS_ADMIN)) |
232 | return -EPERM; | 244 | return -EPERM; |
233 | err = mtrr_del(-1, sentry.base, sentry.size); | 245 | err = mtrr_del(-1, sentry.base, sentry.size); |
234 | break; | 246 | break; |
235 | case MTRRIOC_GET_ENTRY: | 247 | case MTRRIOC_GET_ENTRY: |
248 | #ifdef CONFIG_COMPAT | ||
249 | case MTRRIOC32_GET_ENTRY: | ||
250 | #endif | ||
236 | if (gentry.regnum >= num_var_ranges) | 251 | if (gentry.regnum >= num_var_ranges) |
237 | return -EINVAL; | 252 | return -EINVAL; |
238 | mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); | 253 | mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); |
@@ -249,6 +264,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
249 | 264 | ||
250 | break; | 265 | break; |
251 | case MTRRIOC_ADD_PAGE_ENTRY: | 266 | case MTRRIOC_ADD_PAGE_ENTRY: |
267 | #ifdef CONFIG_COMPAT | ||
268 | case MTRRIOC32_ADD_PAGE_ENTRY: | ||
269 | #endif | ||
252 | if (!capable(CAP_SYS_ADMIN)) | 270 | if (!capable(CAP_SYS_ADMIN)) |
253 | return -EPERM; | 271 | return -EPERM; |
254 | err = | 272 | err = |
@@ -256,21 +274,33 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
256 | file, 1); | 274 | file, 1); |
257 | break; | 275 | break; |
258 | case MTRRIOC_SET_PAGE_ENTRY: | 276 | case MTRRIOC_SET_PAGE_ENTRY: |
277 | #ifdef CONFIG_COMPAT | ||
278 | case MTRRIOC32_SET_PAGE_ENTRY: | ||
279 | #endif | ||
259 | if (!capable(CAP_SYS_ADMIN)) | 280 | if (!capable(CAP_SYS_ADMIN)) |
260 | return -EPERM; | 281 | return -EPERM; |
261 | err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); | 282 | err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); |
262 | break; | 283 | break; |
263 | case MTRRIOC_DEL_PAGE_ENTRY: | 284 | case MTRRIOC_DEL_PAGE_ENTRY: |
285 | #ifdef CONFIG_COMPAT | ||
286 | case MTRRIOC32_DEL_PAGE_ENTRY: | ||
287 | #endif | ||
264 | if (!capable(CAP_SYS_ADMIN)) | 288 | if (!capable(CAP_SYS_ADMIN)) |
265 | return -EPERM; | 289 | return -EPERM; |
266 | err = mtrr_file_del(sentry.base, sentry.size, file, 1); | 290 | err = mtrr_file_del(sentry.base, sentry.size, file, 1); |
267 | break; | 291 | break; |
268 | case MTRRIOC_KILL_PAGE_ENTRY: | 292 | case MTRRIOC_KILL_PAGE_ENTRY: |
293 | #ifdef CONFIG_COMPAT | ||
294 | case MTRRIOC32_KILL_PAGE_ENTRY: | ||
295 | #endif | ||
269 | if (!capable(CAP_SYS_ADMIN)) | 296 | if (!capable(CAP_SYS_ADMIN)) |
270 | return -EPERM; | 297 | return -EPERM; |
271 | err = mtrr_del_page(-1, sentry.base, sentry.size); | 298 | err = mtrr_del_page(-1, sentry.base, sentry.size); |
272 | break; | 299 | break; |
273 | case MTRRIOC_GET_PAGE_ENTRY: | 300 | case MTRRIOC_GET_PAGE_ENTRY: |
301 | #ifdef CONFIG_COMPAT | ||
302 | case MTRRIOC32_GET_PAGE_ENTRY: | ||
303 | #endif | ||
274 | if (gentry.regnum >= num_var_ranges) | 304 | if (gentry.regnum >= num_var_ranges) |
275 | return -EINVAL; | 305 | return -EINVAL; |
276 | mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); | 306 | mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); |
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index 16bb7ea87145..0acfb6a5a220 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c | |||
@@ -50,7 +50,7 @@ u32 num_var_ranges = 0; | |||
50 | unsigned int *usage_table; | 50 | unsigned int *usage_table; |
51 | static DEFINE_MUTEX(mtrr_mutex); | 51 | static DEFINE_MUTEX(mtrr_mutex); |
52 | 52 | ||
53 | u32 size_or_mask, size_and_mask; | 53 | u64 size_or_mask, size_and_mask; |
54 | 54 | ||
55 | static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; | 55 | static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; |
56 | 56 | ||
@@ -662,8 +662,8 @@ void __init mtrr_bp_init(void) | |||
662 | boot_cpu_data.x86_mask == 0x4)) | 662 | boot_cpu_data.x86_mask == 0x4)) |
663 | phys_addr = 36; | 663 | phys_addr = 36; |
664 | 664 | ||
665 | size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); | 665 | size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); |
666 | size_and_mask = ~size_or_mask & 0xfff00000; | 666 | size_and_mask = ~size_or_mask & 0xfffff00000ULL; |
667 | } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && | 667 | } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && |
668 | boot_cpu_data.x86 == 6) { | 668 | boot_cpu_data.x86 == 6) { |
669 | /* VIA C* family have Intel style MTRRs, but | 669 | /* VIA C* family have Intel style MTRRs, but |
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h index d61ea9db6cfe..289dfe6030e3 100644 --- a/arch/i386/kernel/cpu/mtrr/mtrr.h +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h | |||
@@ -84,7 +84,7 @@ void get_mtrr_state(void); | |||
84 | 84 | ||
85 | extern void set_mtrr_ops(struct mtrr_ops * ops); | 85 | extern void set_mtrr_ops(struct mtrr_ops * ops); |
86 | 86 | ||
87 | extern u32 size_or_mask, size_and_mask; | 87 | extern u64 size_or_mask, size_and_mask; |
88 | extern struct mtrr_ops * mtrr_if; | 88 | extern struct mtrr_ops * mtrr_if; |
89 | 89 | ||
90 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) | 90 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) |
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 6624d8583c42..47e3ebbfb28d 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c | |||
@@ -29,7 +29,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
29 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 29 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
30 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, | 30 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, |
31 | NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, | 31 | NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, |
32 | NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow", | 32 | NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", "3dnowext", "3dnow", |
33 | 33 | ||
34 | /* Transmeta-defined */ | 34 | /* Transmeta-defined */ |
35 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, | 35 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, |
@@ -47,7 +47,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
47 | /* Intel-defined (#2) */ | 47 | /* Intel-defined (#2) */ |
48 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", | 48 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", |
49 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, | 49 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, |
50 | NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, | 50 | NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", |
51 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 51 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
52 | 52 | ||
53 | /* VIA/Cyrix/Centaur-defined */ | 53 | /* VIA/Cyrix/Centaur-defined */ |
@@ -57,8 +57,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
57 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 57 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
58 | 58 | ||
59 | /* AMD-defined (#2) */ | 59 | /* AMD-defined (#2) */ |
60 | "lahf_lm", "cmp_legacy", "svm", NULL, "cr8legacy", NULL, NULL, NULL, | 60 | "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8legacy", "abm", |
61 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 61 | "sse4a", "misalignsse", |
62 | "3dnowprefetch", "osvw", "ibs", NULL, NULL, NULL, NULL, NULL, | ||
62 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 63 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
63 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 64 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
64 | }; | 65 | }; |
@@ -69,8 +70,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
69 | "ttp", /* thermal trip */ | 70 | "ttp", /* thermal trip */ |
70 | "tm", | 71 | "tm", |
71 | "stc", | 72 | "stc", |
73 | "100mhzsteps", | ||
74 | "hwpstate", | ||
72 | NULL, | 75 | NULL, |
73 | /* nothing */ /* constant_tsc - moved to flags */ | 76 | NULL, /* constant_tsc - moved to flags */ |
77 | /* nothing */ | ||
74 | }; | 78 | }; |
75 | struct cpuinfo_x86 *c = v; | 79 | struct cpuinfo_x86 *c = v; |
76 | int i, n = c - cpu_data; | 80 | int i, n = c - cpu_data; |
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index 4056fb7d2cdf..5678d46863c6 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c | |||
@@ -9,7 +9,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
9 | { | 9 | { |
10 | unsigned int cap_mask, uk, max, dummy; | 10 | unsigned int cap_mask, uk, max, dummy; |
11 | unsigned int cms_rev1, cms_rev2; | 11 | unsigned int cms_rev1, cms_rev2; |
12 | unsigned int cpu_rev, cpu_freq, cpu_flags, new_cpu_rev; | 12 | unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; |
13 | char cpu_info[65]; | 13 | char cpu_info[65]; |
14 | 14 | ||
15 | get_model_name(c); /* Same as AMD/Cyrix */ | 15 | get_model_name(c); /* Same as AMD/Cyrix */ |
@@ -72,6 +72,9 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
72 | wrmsr(0x80860004, ~0, uk); | 72 | wrmsr(0x80860004, ~0, uk); |
73 | c->x86_capability[0] = cpuid_edx(0x00000001); | 73 | c->x86_capability[0] = cpuid_edx(0x00000001); |
74 | wrmsr(0x80860004, cap_mask, uk); | 74 | wrmsr(0x80860004, cap_mask, uk); |
75 | |||
76 | /* All Transmeta CPUs have a constant TSC */ | ||
77 | set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); | ||
75 | 78 | ||
76 | /* If we can run i686 user-space code, call us an i686 */ | 79 | /* If we can run i686 user-space code, call us an i686 */ |
77 | #define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV) | 80 | #define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV) |
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index 4da75fa3208d..eeae0d992337 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c | |||
@@ -48,7 +48,6 @@ static struct class *cpuid_class; | |||
48 | #ifdef CONFIG_SMP | 48 | #ifdef CONFIG_SMP |
49 | 49 | ||
50 | struct cpuid_command { | 50 | struct cpuid_command { |
51 | int cpu; | ||
52 | u32 reg; | 51 | u32 reg; |
53 | u32 *data; | 52 | u32 *data; |
54 | }; | 53 | }; |
@@ -57,8 +56,7 @@ static void cpuid_smp_cpuid(void *cmd_block) | |||
57 | { | 56 | { |
58 | struct cpuid_command *cmd = (struct cpuid_command *)cmd_block; | 57 | struct cpuid_command *cmd = (struct cpuid_command *)cmd_block; |
59 | 58 | ||
60 | if (cmd->cpu == smp_processor_id()) | 59 | cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2], |
61 | cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2], | ||
62 | &cmd->data[3]); | 60 | &cmd->data[3]); |
63 | } | 61 | } |
64 | 62 | ||
@@ -70,11 +68,10 @@ static inline void do_cpuid(int cpu, u32 reg, u32 * data) | |||
70 | if (cpu == smp_processor_id()) { | 68 | if (cpu == smp_processor_id()) { |
71 | cpuid(reg, &data[0], &data[1], &data[2], &data[3]); | 69 | cpuid(reg, &data[0], &data[1], &data[2], &data[3]); |
72 | } else { | 70 | } else { |
73 | cmd.cpu = cpu; | ||
74 | cmd.reg = reg; | 71 | cmd.reg = reg; |
75 | cmd.data = data; | 72 | cmd.data = data; |
76 | 73 | ||
77 | smp_call_function(cpuid_smp_cpuid, &cmd, 1, 1); | 74 | smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); |
78 | } | 75 | } |
79 | preempt_enable(); | 76 | preempt_enable(); |
80 | } | 77 | } |
diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index f391abcf7da9..70f39560846a 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
15 | #include <asm/page.h> | 15 | #include <asm/page.h> |
16 | #include <asm/e820.h> | 16 | #include <asm/e820.h> |
17 | #include <asm/setup.h> | ||
17 | 18 | ||
18 | #ifdef CONFIG_EFI | 19 | #ifdef CONFIG_EFI |
19 | int efi_enabled = 0; | 20 | int efi_enabled = 0; |
@@ -156,21 +157,22 @@ static struct resource standard_io_resources[] = { { | |||
156 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | 157 | .flags = IORESOURCE_BUSY | IORESOURCE_IO |
157 | } }; | 158 | } }; |
158 | 159 | ||
159 | static int romsignature(const unsigned char *x) | 160 | #define ROMSIGNATURE 0xaa55 |
161 | |||
162 | static int __init romsignature(const unsigned char *rom) | ||
160 | { | 163 | { |
161 | unsigned short sig; | 164 | unsigned short sig; |
162 | int ret = 0; | 165 | |
163 | if (probe_kernel_address((const unsigned short *)x, sig) == 0) | 166 | return probe_kernel_address((const unsigned short *)rom, sig) == 0 && |
164 | ret = (sig == 0xaa55); | 167 | sig == ROMSIGNATURE; |
165 | return ret; | ||
166 | } | 168 | } |
167 | 169 | ||
168 | static int __init romchecksum(unsigned char *rom, unsigned long length) | 170 | static int __init romchecksum(unsigned char *rom, unsigned long length) |
169 | { | 171 | { |
170 | unsigned char *p, sum = 0; | 172 | unsigned char sum; |
171 | 173 | ||
172 | for (p = rom; p < rom + length; p++) | 174 | for (sum = 0; length; length--) |
173 | sum += *p; | 175 | sum += *rom++; |
174 | return sum == 0; | 176 | return sum == 0; |
175 | } | 177 | } |
176 | 178 | ||
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 5e47683fc63a..18bddcb8e9e8 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -30,7 +30,7 @@ | |||
30 | * 18(%esp) - %eax | 30 | * 18(%esp) - %eax |
31 | * 1C(%esp) - %ds | 31 | * 1C(%esp) - %ds |
32 | * 20(%esp) - %es | 32 | * 20(%esp) - %es |
33 | * 24(%esp) - %gs | 33 | * 24(%esp) - %fs |
34 | * 28(%esp) - orig_eax | 34 | * 28(%esp) - orig_eax |
35 | * 2C(%esp) - %eip | 35 | * 2C(%esp) - %eip |
36 | * 30(%esp) - %cs | 36 | * 30(%esp) - %cs |
@@ -99,9 +99,9 @@ VM_MASK = 0x00020000 | |||
99 | 99 | ||
100 | #define SAVE_ALL \ | 100 | #define SAVE_ALL \ |
101 | cld; \ | 101 | cld; \ |
102 | pushl %gs; \ | 102 | pushl %fs; \ |
103 | CFI_ADJUST_CFA_OFFSET 4;\ | 103 | CFI_ADJUST_CFA_OFFSET 4;\ |
104 | /*CFI_REL_OFFSET gs, 0;*/\ | 104 | /*CFI_REL_OFFSET fs, 0;*/\ |
105 | pushl %es; \ | 105 | pushl %es; \ |
106 | CFI_ADJUST_CFA_OFFSET 4;\ | 106 | CFI_ADJUST_CFA_OFFSET 4;\ |
107 | /*CFI_REL_OFFSET es, 0;*/\ | 107 | /*CFI_REL_OFFSET es, 0;*/\ |
@@ -133,7 +133,7 @@ VM_MASK = 0x00020000 | |||
133 | movl %edx, %ds; \ | 133 | movl %edx, %ds; \ |
134 | movl %edx, %es; \ | 134 | movl %edx, %es; \ |
135 | movl $(__KERNEL_PDA), %edx; \ | 135 | movl $(__KERNEL_PDA), %edx; \ |
136 | movl %edx, %gs | 136 | movl %edx, %fs |
137 | 137 | ||
138 | #define RESTORE_INT_REGS \ | 138 | #define RESTORE_INT_REGS \ |
139 | popl %ebx; \ | 139 | popl %ebx; \ |
@@ -166,9 +166,9 @@ VM_MASK = 0x00020000 | |||
166 | 2: popl %es; \ | 166 | 2: popl %es; \ |
167 | CFI_ADJUST_CFA_OFFSET -4;\ | 167 | CFI_ADJUST_CFA_OFFSET -4;\ |
168 | /*CFI_RESTORE es;*/\ | 168 | /*CFI_RESTORE es;*/\ |
169 | 3: popl %gs; \ | 169 | 3: popl %fs; \ |
170 | CFI_ADJUST_CFA_OFFSET -4;\ | 170 | CFI_ADJUST_CFA_OFFSET -4;\ |
171 | /*CFI_RESTORE gs;*/\ | 171 | /*CFI_RESTORE fs;*/\ |
172 | .pushsection .fixup,"ax"; \ | 172 | .pushsection .fixup,"ax"; \ |
173 | 4: movl $0,(%esp); \ | 173 | 4: movl $0,(%esp); \ |
174 | jmp 1b; \ | 174 | jmp 1b; \ |
@@ -227,6 +227,7 @@ ENTRY(ret_from_fork) | |||
227 | CFI_ADJUST_CFA_OFFSET -4 | 227 | CFI_ADJUST_CFA_OFFSET -4 |
228 | jmp syscall_exit | 228 | jmp syscall_exit |
229 | CFI_ENDPROC | 229 | CFI_ENDPROC |
230 | END(ret_from_fork) | ||
230 | 231 | ||
231 | /* | 232 | /* |
232 | * Return to user mode is not as complex as all this looks, | 233 | * Return to user mode is not as complex as all this looks, |
@@ -258,6 +259,7 @@ ENTRY(resume_userspace) | |||
258 | # int/exception return? | 259 | # int/exception return? |
259 | jne work_pending | 260 | jne work_pending |
260 | jmp restore_all | 261 | jmp restore_all |
262 | END(ret_from_exception) | ||
261 | 263 | ||
262 | #ifdef CONFIG_PREEMPT | 264 | #ifdef CONFIG_PREEMPT |
263 | ENTRY(resume_kernel) | 265 | ENTRY(resume_kernel) |
@@ -272,6 +274,7 @@ need_resched: | |||
272 | jz restore_all | 274 | jz restore_all |
273 | call preempt_schedule_irq | 275 | call preempt_schedule_irq |
274 | jmp need_resched | 276 | jmp need_resched |
277 | END(resume_kernel) | ||
275 | #endif | 278 | #endif |
276 | CFI_ENDPROC | 279 | CFI_ENDPROC |
277 | 280 | ||
@@ -349,16 +352,17 @@ sysenter_past_esp: | |||
349 | movl PT_OLDESP(%esp), %ecx | 352 | movl PT_OLDESP(%esp), %ecx |
350 | xorl %ebp,%ebp | 353 | xorl %ebp,%ebp |
351 | TRACE_IRQS_ON | 354 | TRACE_IRQS_ON |
352 | 1: mov PT_GS(%esp), %gs | 355 | 1: mov PT_FS(%esp), %fs |
353 | ENABLE_INTERRUPTS_SYSEXIT | 356 | ENABLE_INTERRUPTS_SYSEXIT |
354 | CFI_ENDPROC | 357 | CFI_ENDPROC |
355 | .pushsection .fixup,"ax" | 358 | .pushsection .fixup,"ax" |
356 | 2: movl $0,PT_GS(%esp) | 359 | 2: movl $0,PT_FS(%esp) |
357 | jmp 1b | 360 | jmp 1b |
358 | .section __ex_table,"a" | 361 | .section __ex_table,"a" |
359 | .align 4 | 362 | .align 4 |
360 | .long 1b,2b | 363 | .long 1b,2b |
361 | .popsection | 364 | .popsection |
365 | ENDPROC(sysenter_entry) | ||
362 | 366 | ||
363 | # system call handler stub | 367 | # system call handler stub |
364 | ENTRY(system_call) | 368 | ENTRY(system_call) |
@@ -459,6 +463,7 @@ ldt_ss: | |||
459 | CFI_ADJUST_CFA_OFFSET -8 | 463 | CFI_ADJUST_CFA_OFFSET -8 |
460 | jmp restore_nocheck | 464 | jmp restore_nocheck |
461 | CFI_ENDPROC | 465 | CFI_ENDPROC |
466 | ENDPROC(system_call) | ||
462 | 467 | ||
463 | # perform work that needs to be done immediately before resumption | 468 | # perform work that needs to be done immediately before resumption |
464 | ALIGN | 469 | ALIGN |
@@ -504,6 +509,7 @@ work_notifysig_v86: | |||
504 | xorl %edx, %edx | 509 | xorl %edx, %edx |
505 | call do_notify_resume | 510 | call do_notify_resume |
506 | jmp resume_userspace_sig | 511 | jmp resume_userspace_sig |
512 | END(work_pending) | ||
507 | 513 | ||
508 | # perform syscall exit tracing | 514 | # perform syscall exit tracing |
509 | ALIGN | 515 | ALIGN |
@@ -519,6 +525,7 @@ syscall_trace_entry: | |||
519 | cmpl $(nr_syscalls), %eax | 525 | cmpl $(nr_syscalls), %eax |
520 | jnae syscall_call | 526 | jnae syscall_call |
521 | jmp syscall_exit | 527 | jmp syscall_exit |
528 | END(syscall_trace_entry) | ||
522 | 529 | ||
523 | # perform syscall exit tracing | 530 | # perform syscall exit tracing |
524 | ALIGN | 531 | ALIGN |
@@ -532,6 +539,7 @@ syscall_exit_work: | |||
532 | movl $1, %edx | 539 | movl $1, %edx |
533 | call do_syscall_trace | 540 | call do_syscall_trace |
534 | jmp resume_userspace | 541 | jmp resume_userspace |
542 | END(syscall_exit_work) | ||
535 | CFI_ENDPROC | 543 | CFI_ENDPROC |
536 | 544 | ||
537 | RING0_INT_FRAME # can't unwind into user space anyway | 545 | RING0_INT_FRAME # can't unwind into user space anyway |
@@ -542,15 +550,17 @@ syscall_fault: | |||
542 | GET_THREAD_INFO(%ebp) | 550 | GET_THREAD_INFO(%ebp) |
543 | movl $-EFAULT,PT_EAX(%esp) | 551 | movl $-EFAULT,PT_EAX(%esp) |
544 | jmp resume_userspace | 552 | jmp resume_userspace |
553 | END(syscall_fault) | ||
545 | 554 | ||
546 | syscall_badsys: | 555 | syscall_badsys: |
547 | movl $-ENOSYS,PT_EAX(%esp) | 556 | movl $-ENOSYS,PT_EAX(%esp) |
548 | jmp resume_userspace | 557 | jmp resume_userspace |
558 | END(syscall_badsys) | ||
549 | CFI_ENDPROC | 559 | CFI_ENDPROC |
550 | 560 | ||
551 | #define FIXUP_ESPFIX_STACK \ | 561 | #define FIXUP_ESPFIX_STACK \ |
552 | /* since we are on a wrong stack, we cant make it a C code :( */ \ | 562 | /* since we are on a wrong stack, we cant make it a C code :( */ \ |
553 | movl %gs:PDA_cpu, %ebx; \ | 563 | movl %fs:PDA_cpu, %ebx; \ |
554 | PER_CPU(cpu_gdt_descr, %ebx); \ | 564 | PER_CPU(cpu_gdt_descr, %ebx); \ |
555 | movl GDS_address(%ebx), %ebx; \ | 565 | movl GDS_address(%ebx), %ebx; \ |
556 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ | 566 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ |
@@ -581,9 +591,9 @@ syscall_badsys: | |||
581 | ENTRY(interrupt) | 591 | ENTRY(interrupt) |
582 | .text | 592 | .text |
583 | 593 | ||
584 | vector=0 | ||
585 | ENTRY(irq_entries_start) | 594 | ENTRY(irq_entries_start) |
586 | RING0_INT_FRAME | 595 | RING0_INT_FRAME |
596 | vector=0 | ||
587 | .rept NR_IRQS | 597 | .rept NR_IRQS |
588 | ALIGN | 598 | ALIGN |
589 | .if vector | 599 | .if vector |
@@ -592,11 +602,16 @@ ENTRY(irq_entries_start) | |||
592 | 1: pushl $~(vector) | 602 | 1: pushl $~(vector) |
593 | CFI_ADJUST_CFA_OFFSET 4 | 603 | CFI_ADJUST_CFA_OFFSET 4 |
594 | jmp common_interrupt | 604 | jmp common_interrupt |
595 | .data | 605 | .previous |
596 | .long 1b | 606 | .long 1b |
597 | .text | 607 | .text |
598 | vector=vector+1 | 608 | vector=vector+1 |
599 | .endr | 609 | .endr |
610 | END(irq_entries_start) | ||
611 | |||
612 | .previous | ||
613 | END(interrupt) | ||
614 | .previous | ||
600 | 615 | ||
601 | /* | 616 | /* |
602 | * the CPU automatically disables interrupts when executing an IRQ vector, | 617 | * the CPU automatically disables interrupts when executing an IRQ vector, |
@@ -609,6 +624,7 @@ common_interrupt: | |||
609 | movl %esp,%eax | 624 | movl %esp,%eax |
610 | call do_IRQ | 625 | call do_IRQ |
611 | jmp ret_from_intr | 626 | jmp ret_from_intr |
627 | ENDPROC(common_interrupt) | ||
612 | CFI_ENDPROC | 628 | CFI_ENDPROC |
613 | 629 | ||
614 | #define BUILD_INTERRUPT(name, nr) \ | 630 | #define BUILD_INTERRUPT(name, nr) \ |
@@ -621,18 +637,24 @@ ENTRY(name) \ | |||
621 | movl %esp,%eax; \ | 637 | movl %esp,%eax; \ |
622 | call smp_/**/name; \ | 638 | call smp_/**/name; \ |
623 | jmp ret_from_intr; \ | 639 | jmp ret_from_intr; \ |
624 | CFI_ENDPROC | 640 | CFI_ENDPROC; \ |
641 | ENDPROC(name) | ||
625 | 642 | ||
626 | /* The include is where all of the SMP etc. interrupts come from */ | 643 | /* The include is where all of the SMP etc. interrupts come from */ |
627 | #include "entry_arch.h" | 644 | #include "entry_arch.h" |
628 | 645 | ||
646 | /* This alternate entry is needed because we hijack the apic LVTT */ | ||
647 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC) | ||
648 | BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) | ||
649 | #endif | ||
650 | |||
629 | KPROBE_ENTRY(page_fault) | 651 | KPROBE_ENTRY(page_fault) |
630 | RING0_EC_FRAME | 652 | RING0_EC_FRAME |
631 | pushl $do_page_fault | 653 | pushl $do_page_fault |
632 | CFI_ADJUST_CFA_OFFSET 4 | 654 | CFI_ADJUST_CFA_OFFSET 4 |
633 | ALIGN | 655 | ALIGN |
634 | error_code: | 656 | error_code: |
635 | /* the function address is in %gs's slot on the stack */ | 657 | /* the function address is in %fs's slot on the stack */ |
636 | pushl %es | 658 | pushl %es |
637 | CFI_ADJUST_CFA_OFFSET 4 | 659 | CFI_ADJUST_CFA_OFFSET 4 |
638 | /*CFI_REL_OFFSET es, 0*/ | 660 | /*CFI_REL_OFFSET es, 0*/ |
@@ -661,20 +683,20 @@ error_code: | |||
661 | CFI_ADJUST_CFA_OFFSET 4 | 683 | CFI_ADJUST_CFA_OFFSET 4 |
662 | CFI_REL_OFFSET ebx, 0 | 684 | CFI_REL_OFFSET ebx, 0 |
663 | cld | 685 | cld |
664 | pushl %gs | 686 | pushl %fs |
665 | CFI_ADJUST_CFA_OFFSET 4 | 687 | CFI_ADJUST_CFA_OFFSET 4 |
666 | /*CFI_REL_OFFSET gs, 0*/ | 688 | /*CFI_REL_OFFSET fs, 0*/ |
667 | movl $(__KERNEL_PDA), %ecx | 689 | movl $(__KERNEL_PDA), %ecx |
668 | movl %ecx, %gs | 690 | movl %ecx, %fs |
669 | UNWIND_ESPFIX_STACK | 691 | UNWIND_ESPFIX_STACK |
670 | popl %ecx | 692 | popl %ecx |
671 | CFI_ADJUST_CFA_OFFSET -4 | 693 | CFI_ADJUST_CFA_OFFSET -4 |
672 | /*CFI_REGISTER es, ecx*/ | 694 | /*CFI_REGISTER es, ecx*/ |
673 | movl PT_GS(%esp), %edi # get the function address | 695 | movl PT_FS(%esp), %edi # get the function address |
674 | movl PT_ORIG_EAX(%esp), %edx # get the error code | 696 | movl PT_ORIG_EAX(%esp), %edx # get the error code |
675 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | 697 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart |
676 | mov %ecx, PT_GS(%esp) | 698 | mov %ecx, PT_FS(%esp) |
677 | /*CFI_REL_OFFSET gs, ES*/ | 699 | /*CFI_REL_OFFSET fs, ES*/ |
678 | movl $(__USER_DS), %ecx | 700 | movl $(__USER_DS), %ecx |
679 | movl %ecx, %ds | 701 | movl %ecx, %ds |
680 | movl %ecx, %es | 702 | movl %ecx, %es |
@@ -692,6 +714,7 @@ ENTRY(coprocessor_error) | |||
692 | CFI_ADJUST_CFA_OFFSET 4 | 714 | CFI_ADJUST_CFA_OFFSET 4 |
693 | jmp error_code | 715 | jmp error_code |
694 | CFI_ENDPROC | 716 | CFI_ENDPROC |
717 | END(coprocessor_error) | ||
695 | 718 | ||
696 | ENTRY(simd_coprocessor_error) | 719 | ENTRY(simd_coprocessor_error) |
697 | RING0_INT_FRAME | 720 | RING0_INT_FRAME |
@@ -701,6 +724,7 @@ ENTRY(simd_coprocessor_error) | |||
701 | CFI_ADJUST_CFA_OFFSET 4 | 724 | CFI_ADJUST_CFA_OFFSET 4 |
702 | jmp error_code | 725 | jmp error_code |
703 | CFI_ENDPROC | 726 | CFI_ENDPROC |
727 | END(simd_coprocessor_error) | ||
704 | 728 | ||
705 | ENTRY(device_not_available) | 729 | ENTRY(device_not_available) |
706 | RING0_INT_FRAME | 730 | RING0_INT_FRAME |
@@ -721,6 +745,7 @@ device_not_available_emulate: | |||
721 | CFI_ADJUST_CFA_OFFSET -4 | 745 | CFI_ADJUST_CFA_OFFSET -4 |
722 | jmp ret_from_exception | 746 | jmp ret_from_exception |
723 | CFI_ENDPROC | 747 | CFI_ENDPROC |
748 | END(device_not_available) | ||
724 | 749 | ||
725 | /* | 750 | /* |
726 | * Debug traps and NMI can happen at the one SYSENTER instruction | 751 | * Debug traps and NMI can happen at the one SYSENTER instruction |
@@ -864,10 +889,12 @@ ENTRY(native_iret) | |||
864 | .align 4 | 889 | .align 4 |
865 | .long 1b,iret_exc | 890 | .long 1b,iret_exc |
866 | .previous | 891 | .previous |
892 | END(native_iret) | ||
867 | 893 | ||
868 | ENTRY(native_irq_enable_sysexit) | 894 | ENTRY(native_irq_enable_sysexit) |
869 | sti | 895 | sti |
870 | sysexit | 896 | sysexit |
897 | END(native_irq_enable_sysexit) | ||
871 | #endif | 898 | #endif |
872 | 899 | ||
873 | KPROBE_ENTRY(int3) | 900 | KPROBE_ENTRY(int3) |
@@ -890,6 +917,7 @@ ENTRY(overflow) | |||
890 | CFI_ADJUST_CFA_OFFSET 4 | 917 | CFI_ADJUST_CFA_OFFSET 4 |
891 | jmp error_code | 918 | jmp error_code |
892 | CFI_ENDPROC | 919 | CFI_ENDPROC |
920 | END(overflow) | ||
893 | 921 | ||
894 | ENTRY(bounds) | 922 | ENTRY(bounds) |
895 | RING0_INT_FRAME | 923 | RING0_INT_FRAME |
@@ -899,6 +927,7 @@ ENTRY(bounds) | |||
899 | CFI_ADJUST_CFA_OFFSET 4 | 927 | CFI_ADJUST_CFA_OFFSET 4 |
900 | jmp error_code | 928 | jmp error_code |
901 | CFI_ENDPROC | 929 | CFI_ENDPROC |
930 | END(bounds) | ||
902 | 931 | ||
903 | ENTRY(invalid_op) | 932 | ENTRY(invalid_op) |
904 | RING0_INT_FRAME | 933 | RING0_INT_FRAME |
@@ -908,6 +937,7 @@ ENTRY(invalid_op) | |||
908 | CFI_ADJUST_CFA_OFFSET 4 | 937 | CFI_ADJUST_CFA_OFFSET 4 |
909 | jmp error_code | 938 | jmp error_code |
910 | CFI_ENDPROC | 939 | CFI_ENDPROC |
940 | END(invalid_op) | ||
911 | 941 | ||
912 | ENTRY(coprocessor_segment_overrun) | 942 | ENTRY(coprocessor_segment_overrun) |
913 | RING0_INT_FRAME | 943 | RING0_INT_FRAME |
@@ -917,6 +947,7 @@ ENTRY(coprocessor_segment_overrun) | |||
917 | CFI_ADJUST_CFA_OFFSET 4 | 947 | CFI_ADJUST_CFA_OFFSET 4 |
918 | jmp error_code | 948 | jmp error_code |
919 | CFI_ENDPROC | 949 | CFI_ENDPROC |
950 | END(coprocessor_segment_overrun) | ||
920 | 951 | ||
921 | ENTRY(invalid_TSS) | 952 | ENTRY(invalid_TSS) |
922 | RING0_EC_FRAME | 953 | RING0_EC_FRAME |
@@ -924,6 +955,7 @@ ENTRY(invalid_TSS) | |||
924 | CFI_ADJUST_CFA_OFFSET 4 | 955 | CFI_ADJUST_CFA_OFFSET 4 |
925 | jmp error_code | 956 | jmp error_code |
926 | CFI_ENDPROC | 957 | CFI_ENDPROC |
958 | END(invalid_TSS) | ||
927 | 959 | ||
928 | ENTRY(segment_not_present) | 960 | ENTRY(segment_not_present) |
929 | RING0_EC_FRAME | 961 | RING0_EC_FRAME |
@@ -931,6 +963,7 @@ ENTRY(segment_not_present) | |||
931 | CFI_ADJUST_CFA_OFFSET 4 | 963 | CFI_ADJUST_CFA_OFFSET 4 |
932 | jmp error_code | 964 | jmp error_code |
933 | CFI_ENDPROC | 965 | CFI_ENDPROC |
966 | END(segment_not_present) | ||
934 | 967 | ||
935 | ENTRY(stack_segment) | 968 | ENTRY(stack_segment) |
936 | RING0_EC_FRAME | 969 | RING0_EC_FRAME |
@@ -938,6 +971,7 @@ ENTRY(stack_segment) | |||
938 | CFI_ADJUST_CFA_OFFSET 4 | 971 | CFI_ADJUST_CFA_OFFSET 4 |
939 | jmp error_code | 972 | jmp error_code |
940 | CFI_ENDPROC | 973 | CFI_ENDPROC |
974 | END(stack_segment) | ||
941 | 975 | ||
942 | KPROBE_ENTRY(general_protection) | 976 | KPROBE_ENTRY(general_protection) |
943 | RING0_EC_FRAME | 977 | RING0_EC_FRAME |
@@ -953,6 +987,7 @@ ENTRY(alignment_check) | |||
953 | CFI_ADJUST_CFA_OFFSET 4 | 987 | CFI_ADJUST_CFA_OFFSET 4 |
954 | jmp error_code | 988 | jmp error_code |
955 | CFI_ENDPROC | 989 | CFI_ENDPROC |
990 | END(alignment_check) | ||
956 | 991 | ||
957 | ENTRY(divide_error) | 992 | ENTRY(divide_error) |
958 | RING0_INT_FRAME | 993 | RING0_INT_FRAME |
@@ -962,6 +997,7 @@ ENTRY(divide_error) | |||
962 | CFI_ADJUST_CFA_OFFSET 4 | 997 | CFI_ADJUST_CFA_OFFSET 4 |
963 | jmp error_code | 998 | jmp error_code |
964 | CFI_ENDPROC | 999 | CFI_ENDPROC |
1000 | END(divide_error) | ||
965 | 1001 | ||
966 | #ifdef CONFIG_X86_MCE | 1002 | #ifdef CONFIG_X86_MCE |
967 | ENTRY(machine_check) | 1003 | ENTRY(machine_check) |
@@ -972,6 +1008,7 @@ ENTRY(machine_check) | |||
972 | CFI_ADJUST_CFA_OFFSET 4 | 1008 | CFI_ADJUST_CFA_OFFSET 4 |
973 | jmp error_code | 1009 | jmp error_code |
974 | CFI_ENDPROC | 1010 | CFI_ENDPROC |
1011 | END(machine_check) | ||
975 | #endif | 1012 | #endif |
976 | 1013 | ||
977 | ENTRY(spurious_interrupt_bug) | 1014 | ENTRY(spurious_interrupt_bug) |
@@ -982,6 +1019,7 @@ ENTRY(spurious_interrupt_bug) | |||
982 | CFI_ADJUST_CFA_OFFSET 4 | 1019 | CFI_ADJUST_CFA_OFFSET 4 |
983 | jmp error_code | 1020 | jmp error_code |
984 | CFI_ENDPROC | 1021 | CFI_ENDPROC |
1022 | END(spurious_interrupt_bug) | ||
985 | 1023 | ||
986 | ENTRY(kernel_thread_helper) | 1024 | ENTRY(kernel_thread_helper) |
987 | pushl $0 # fake return address for unwinder | 1025 | pushl $0 # fake return address for unwinder |
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index cb9abdfced9b..3fa7f9389afe 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S | |||
@@ -53,6 +53,7 @@ | |||
53 | * any particular GDT layout, because we load our own as soon as we | 53 | * any particular GDT layout, because we load our own as soon as we |
54 | * can. | 54 | * can. |
55 | */ | 55 | */ |
56 | .section .text.head,"ax",@progbits | ||
56 | ENTRY(startup_32) | 57 | ENTRY(startup_32) |
57 | 58 | ||
58 | #ifdef CONFIG_PARAVIRT | 59 | #ifdef CONFIG_PARAVIRT |
@@ -141,16 +142,25 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
141 | jb 10b | 142 | jb 10b |
142 | movl %edi,(init_pg_tables_end - __PAGE_OFFSET) | 143 | movl %edi,(init_pg_tables_end - __PAGE_OFFSET) |
143 | 144 | ||
144 | #ifdef CONFIG_SMP | ||
145 | xorl %ebx,%ebx /* This is the boot CPU (BSP) */ | 145 | xorl %ebx,%ebx /* This is the boot CPU (BSP) */ |
146 | jmp 3f | 146 | jmp 3f |
147 | |||
148 | /* | 147 | /* |
149 | * Non-boot CPU entry point; entered from trampoline.S | 148 | * Non-boot CPU entry point; entered from trampoline.S |
150 | * We can't lgdt here, because lgdt itself uses a data segment, but | 149 | * We can't lgdt here, because lgdt itself uses a data segment, but |
151 | * we know the trampoline has already loaded the boot_gdt_table GDT | 150 | * we know the trampoline has already loaded the boot_gdt_table GDT |
152 | * for us. | 151 | * for us. |
152 | * | ||
153 | * If cpu hotplug is not supported then this code can go in init section | ||
154 | * which will be freed later | ||
153 | */ | 155 | */ |
156 | |||
157 | #ifdef CONFIG_HOTPLUG_CPU | ||
158 | .section .text,"ax",@progbits | ||
159 | #else | ||
160 | .section .init.text,"ax",@progbits | ||
161 | #endif | ||
162 | |||
163 | #ifdef CONFIG_SMP | ||
154 | ENTRY(startup_32_smp) | 164 | ENTRY(startup_32_smp) |
155 | cld | 165 | cld |
156 | movl $(__BOOT_DS),%eax | 166 | movl $(__BOOT_DS),%eax |
@@ -208,8 +218,8 @@ ENTRY(startup_32_smp) | |||
208 | xorl %ebx,%ebx | 218 | xorl %ebx,%ebx |
209 | incl %ebx | 219 | incl %ebx |
210 | 220 | ||
211 | 3: | ||
212 | #endif /* CONFIG_SMP */ | 221 | #endif /* CONFIG_SMP */ |
222 | 3: | ||
213 | 223 | ||
214 | /* | 224 | /* |
215 | * Enable paging | 225 | * Enable paging |
@@ -309,7 +319,7 @@ is386: movl $2,%ecx # set MP | |||
309 | 319 | ||
310 | call check_x87 | 320 | call check_x87 |
311 | call setup_pda | 321 | call setup_pda |
312 | lgdt cpu_gdt_descr | 322 | lgdt early_gdt_descr |
313 | lidt idt_descr | 323 | lidt idt_descr |
314 | ljmp $(__KERNEL_CS),$1f | 324 | ljmp $(__KERNEL_CS),$1f |
315 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers | 325 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers |
@@ -319,12 +329,12 @@ is386: movl $2,%ecx # set MP | |||
319 | movl %eax,%ds | 329 | movl %eax,%ds |
320 | movl %eax,%es | 330 | movl %eax,%es |
321 | 331 | ||
322 | xorl %eax,%eax # Clear FS and LDT | 332 | xorl %eax,%eax # Clear GS and LDT |
323 | movl %eax,%fs | 333 | movl %eax,%gs |
324 | lldt %ax | 334 | lldt %ax |
325 | 335 | ||
326 | movl $(__KERNEL_PDA),%eax | 336 | movl $(__KERNEL_PDA),%eax |
327 | mov %eax,%gs | 337 | mov %eax,%fs |
328 | 338 | ||
329 | cld # gcc2 wants the direction flag cleared at all times | 339 | cld # gcc2 wants the direction flag cleared at all times |
330 | pushl $0 # fake return address for unwinder | 340 | pushl $0 # fake return address for unwinder |
@@ -360,12 +370,12 @@ check_x87: | |||
360 | * cpu_gdt_table and boot_pda; for secondary CPUs, these will be | 370 | * cpu_gdt_table and boot_pda; for secondary CPUs, these will be |
361 | * that CPU's GDT and PDA. | 371 | * that CPU's GDT and PDA. |
362 | */ | 372 | */ |
363 | setup_pda: | 373 | ENTRY(setup_pda) |
364 | /* get the PDA pointer */ | 374 | /* get the PDA pointer */ |
365 | movl start_pda, %eax | 375 | movl start_pda, %eax |
366 | 376 | ||
367 | /* slot the PDA address into the GDT */ | 377 | /* slot the PDA address into the GDT */ |
368 | mov cpu_gdt_descr+2, %ecx | 378 | mov early_gdt_descr+2, %ecx |
369 | mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ | 379 | mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ |
370 | shr $16, %eax | 380 | shr $16, %eax |
371 | mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ | 381 | mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ |
@@ -492,6 +502,7 @@ ignore_int: | |||
492 | #endif | 502 | #endif |
493 | iret | 503 | iret |
494 | 504 | ||
505 | .section .text | ||
495 | #ifdef CONFIG_PARAVIRT | 506 | #ifdef CONFIG_PARAVIRT |
496 | startup_paravirt: | 507 | startup_paravirt: |
497 | cld | 508 | cld |
@@ -502,10 +513,11 @@ startup_paravirt: | |||
502 | pushl %ecx | 513 | pushl %ecx |
503 | pushl %eax | 514 | pushl %eax |
504 | 515 | ||
505 | /* paravirt.o is last in link, and that probe fn never returns */ | ||
506 | pushl $__start_paravirtprobe | 516 | pushl $__start_paravirtprobe |
507 | 1: | 517 | 1: |
508 | movl 0(%esp), %eax | 518 | movl 0(%esp), %eax |
519 | cmpl $__stop_paravirtprobe, %eax | ||
520 | je unhandled_paravirt | ||
509 | pushl (%eax) | 521 | pushl (%eax) |
510 | movl 8(%esp), %eax | 522 | movl 8(%esp), %eax |
511 | call *(%esp) | 523 | call *(%esp) |
@@ -517,6 +529,10 @@ startup_paravirt: | |||
517 | 529 | ||
518 | addl $4, (%esp) | 530 | addl $4, (%esp) |
519 | jmp 1b | 531 | jmp 1b |
532 | |||
533 | unhandled_paravirt: | ||
534 | /* Nothing wanted us: we're screwed. */ | ||
535 | ud2 | ||
520 | #endif | 536 | #endif |
521 | 537 | ||
522 | /* | 538 | /* |
@@ -581,7 +597,7 @@ idt_descr: | |||
581 | 597 | ||
582 | # boot GDT descriptor (later on used by CPU#0): | 598 | # boot GDT descriptor (later on used by CPU#0): |
583 | .word 0 # 32 bit align gdt_desc.address | 599 | .word 0 # 32 bit align gdt_desc.address |
584 | ENTRY(cpu_gdt_descr) | 600 | ENTRY(early_gdt_descr) |
585 | .word GDT_ENTRIES*8-1 | 601 | .word GDT_ENTRIES*8-1 |
586 | .long cpu_gdt_table | 602 | .long cpu_gdt_table |
587 | 603 | ||
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index ba8d302a0b72..e30ccedad0b9 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c | |||
@@ -1920,7 +1920,7 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1920 | static void __init setup_ioapic_ids_from_mpc(void) { } | 1920 | static void __init setup_ioapic_ids_from_mpc(void) { } |
1921 | #endif | 1921 | #endif |
1922 | 1922 | ||
1923 | static int no_timer_check __initdata; | 1923 | int no_timer_check __initdata; |
1924 | 1924 | ||
1925 | static int __init notimercheck(char *s) | 1925 | static int __init notimercheck(char *s) |
1926 | { | 1926 | { |
@@ -2310,7 +2310,7 @@ static inline void __init check_timer(void) | |||
2310 | 2310 | ||
2311 | disable_8259A_irq(0); | 2311 | disable_8259A_irq(0); |
2312 | set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, | 2312 | set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, |
2313 | "fasteio"); | 2313 | "fasteoi"); |
2314 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ | 2314 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ |
2315 | enable_8259A_irq(0); | 2315 | enable_8259A_irq(0); |
2316 | 2316 | ||
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 3201d421090a..5785d84103a6 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c | |||
@@ -19,6 +19,8 @@ | |||
19 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
20 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
21 | 21 | ||
22 | #include <asm/idle.h> | ||
23 | |||
22 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; | 24 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; |
23 | EXPORT_PER_CPU_SYMBOL(irq_stat); | 25 | EXPORT_PER_CPU_SYMBOL(irq_stat); |
24 | 26 | ||
@@ -61,6 +63,7 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) | |||
61 | union irq_ctx *curctx, *irqctx; | 63 | union irq_ctx *curctx, *irqctx; |
62 | u32 *isp; | 64 | u32 *isp; |
63 | #endif | 65 | #endif |
66 | exit_idle(); | ||
64 | 67 | ||
65 | if (unlikely((unsigned)irq >= NR_IRQS)) { | 68 | if (unlikely((unsigned)irq >= NR_IRQS)) { |
66 | printk(KERN_EMERG "%s: cannot handle IRQ %d\n", | 69 | printk(KERN_EMERG "%s: cannot handle IRQ %d\n", |
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index af1d53344993..b545bc746fce 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c | |||
@@ -363,7 +363,7 @@ no_kprobe: | |||
363 | " pushf\n" | 363 | " pushf\n" |
364 | /* skip cs, eip, orig_eax */ | 364 | /* skip cs, eip, orig_eax */ |
365 | " subl $12, %esp\n" | 365 | " subl $12, %esp\n" |
366 | " pushl %gs\n" | 366 | " pushl %fs\n" |
367 | " pushl %ds\n" | 367 | " pushl %ds\n" |
368 | " pushl %es\n" | 368 | " pushl %es\n" |
369 | " pushl %eax\n" | 369 | " pushl %eax\n" |
@@ -387,7 +387,7 @@ no_kprobe: | |||
387 | " popl %edi\n" | 387 | " popl %edi\n" |
388 | " popl %ebp\n" | 388 | " popl %ebp\n" |
389 | " popl %eax\n" | 389 | " popl %eax\n" |
390 | /* skip eip, orig_eax, es, ds, gs */ | 390 | /* skip eip, orig_eax, es, ds, fs */ |
391 | " addl $20, %esp\n" | 391 | " addl $20, %esp\n" |
392 | " popf\n" | 392 | " popf\n" |
393 | " ret\n"); | 393 | " ret\n"); |
@@ -408,7 +408,7 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs) | |||
408 | spin_lock_irqsave(&kretprobe_lock, flags); | 408 | spin_lock_irqsave(&kretprobe_lock, flags); |
409 | head = kretprobe_inst_table_head(current); | 409 | head = kretprobe_inst_table_head(current); |
410 | /* fixup registers */ | 410 | /* fixup registers */ |
411 | regs->xcs = __KERNEL_CS; | 411 | regs->xcs = __KERNEL_CS | get_kernel_rpl(); |
412 | regs->eip = trampoline_address; | 412 | regs->eip = trampoline_address; |
413 | regs->orig_eax = 0xffffffff; | 413 | regs->orig_eax = 0xffffffff; |
414 | 414 | ||
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 381252bae3d8..b8f16633a6ec 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c | |||
@@ -384,7 +384,7 @@ static int do_microcode_update (void) | |||
384 | { | 384 | { |
385 | long cursor = 0; | 385 | long cursor = 0; |
386 | int error = 0; | 386 | int error = 0; |
387 | void *new_mc; | 387 | void *new_mc = NULL; |
388 | int cpu; | 388 | int cpu; |
389 | cpumask_t old; | 389 | cpumask_t old; |
390 | 390 | ||
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 4e14264f392a..bcaa6e9b6197 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c | |||
@@ -68,7 +68,6 @@ static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx) | |||
68 | #ifdef CONFIG_SMP | 68 | #ifdef CONFIG_SMP |
69 | 69 | ||
70 | struct msr_command { | 70 | struct msr_command { |
71 | int cpu; | ||
72 | int err; | 71 | int err; |
73 | u32 reg; | 72 | u32 reg; |
74 | u32 data[2]; | 73 | u32 data[2]; |
@@ -78,16 +77,14 @@ static void msr_smp_wrmsr(void *cmd_block) | |||
78 | { | 77 | { |
79 | struct msr_command *cmd = (struct msr_command *)cmd_block; | 78 | struct msr_command *cmd = (struct msr_command *)cmd_block; |
80 | 79 | ||
81 | if (cmd->cpu == smp_processor_id()) | 80 | cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]); |
82 | cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]); | ||
83 | } | 81 | } |
84 | 82 | ||
85 | static void msr_smp_rdmsr(void *cmd_block) | 83 | static void msr_smp_rdmsr(void *cmd_block) |
86 | { | 84 | { |
87 | struct msr_command *cmd = (struct msr_command *)cmd_block; | 85 | struct msr_command *cmd = (struct msr_command *)cmd_block; |
88 | 86 | ||
89 | if (cmd->cpu == smp_processor_id()) | 87 | cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]); |
90 | cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]); | ||
91 | } | 88 | } |
92 | 89 | ||
93 | static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) | 90 | static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) |
@@ -99,12 +96,11 @@ static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) | |||
99 | if (cpu == smp_processor_id()) { | 96 | if (cpu == smp_processor_id()) { |
100 | ret = wrmsr_eio(reg, eax, edx); | 97 | ret = wrmsr_eio(reg, eax, edx); |
101 | } else { | 98 | } else { |
102 | cmd.cpu = cpu; | ||
103 | cmd.reg = reg; | 99 | cmd.reg = reg; |
104 | cmd.data[0] = eax; | 100 | cmd.data[0] = eax; |
105 | cmd.data[1] = edx; | 101 | cmd.data[1] = edx; |
106 | 102 | ||
107 | smp_call_function(msr_smp_wrmsr, &cmd, 1, 1); | 103 | smp_call_function_single(cpu, msr_smp_wrmsr, &cmd, 1, 1); |
108 | ret = cmd.err; | 104 | ret = cmd.err; |
109 | } | 105 | } |
110 | preempt_enable(); | 106 | preempt_enable(); |
@@ -120,10 +116,9 @@ static inline int do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx) | |||
120 | if (cpu == smp_processor_id()) { | 116 | if (cpu == smp_processor_id()) { |
121 | ret = rdmsr_eio(reg, eax, edx); | 117 | ret = rdmsr_eio(reg, eax, edx); |
122 | } else { | 118 | } else { |
123 | cmd.cpu = cpu; | ||
124 | cmd.reg = reg; | 119 | cmd.reg = reg; |
125 | 120 | ||
126 | smp_call_function(msr_smp_rdmsr, &cmd, 1, 1); | 121 | smp_call_function_single(cpu, msr_smp_rdmsr, &cmd, 1, 1); |
127 | 122 | ||
128 | *eax = cmd.data[0]; | 123 | *eax = cmd.data[0]; |
129 | *edx = cmd.data[1]; | 124 | *edx = cmd.data[1]; |
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 1a6f8bb8881c..5d8a07c20281 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
@@ -185,7 +185,8 @@ static __cpuinit inline int nmi_known_cpu(void) | |||
185 | { | 185 | { |
186 | switch (boot_cpu_data.x86_vendor) { | 186 | switch (boot_cpu_data.x86_vendor) { |
187 | case X86_VENDOR_AMD: | 187 | case X86_VENDOR_AMD: |
188 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); | 188 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6) |
189 | || (boot_cpu_data.x86 == 16)); | ||
189 | case X86_VENDOR_INTEL: | 190 | case X86_VENDOR_INTEL: |
190 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 191 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
191 | return 1; | 192 | return 1; |
@@ -216,6 +217,28 @@ static __init void nmi_cpu_busy(void *data) | |||
216 | } | 217 | } |
217 | #endif | 218 | #endif |
218 | 219 | ||
220 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | ||
221 | { | ||
222 | u64 counter_val; | ||
223 | unsigned int retval = hz; | ||
224 | |||
225 | /* | ||
226 | * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter | ||
227 | * are writable, with higher bits sign extending from bit 31. | ||
228 | * So, we can only program the counter with 31 bit values and | ||
229 | * 32nd bit should be 1, for 33.. to be 1. | ||
230 | * Find the appropriate nmi_hz | ||
231 | */ | ||
232 | counter_val = (u64)cpu_khz * 1000; | ||
233 | do_div(counter_val, retval); | ||
234 | if (counter_val > 0x7fffffffULL) { | ||
235 | u64 count = (u64)cpu_khz * 1000; | ||
236 | do_div(count, 0x7fffffffUL); | ||
237 | retval = count + 1; | ||
238 | } | ||
239 | return retval; | ||
240 | } | ||
241 | |||
219 | static int __init check_nmi_watchdog(void) | 242 | static int __init check_nmi_watchdog(void) |
220 | { | 243 | { |
221 | unsigned int *prev_nmi_count; | 244 | unsigned int *prev_nmi_count; |
@@ -281,18 +304,10 @@ static int __init check_nmi_watchdog(void) | |||
281 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 304 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
282 | 305 | ||
283 | nmi_hz = 1; | 306 | nmi_hz = 1; |
284 | /* | 307 | |
285 | * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter | 308 | if (wd->perfctr_msr == MSR_P6_PERFCTR0 || |
286 | * are writable, with higher bits sign extending from bit 31. | 309 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { |
287 | * So, we can only program the counter with 31 bit values and | 310 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
288 | * 32nd bit should be 1, for 33.. to be 1. | ||
289 | * Find the appropriate nmi_hz | ||
290 | */ | ||
291 | if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 && | ||
292 | ((u64)cpu_khz * 1000) > 0x7fffffffULL) { | ||
293 | u64 count = (u64)cpu_khz * 1000; | ||
294 | do_div(count, 0x7fffffffUL); | ||
295 | nmi_hz = count + 1; | ||
296 | } | 311 | } |
297 | } | 312 | } |
298 | 313 | ||
@@ -369,6 +384,34 @@ void enable_timer_nmi_watchdog(void) | |||
369 | } | 384 | } |
370 | } | 385 | } |
371 | 386 | ||
387 | static void __acpi_nmi_disable(void *__unused) | ||
388 | { | ||
389 | apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
390 | } | ||
391 | |||
392 | /* | ||
393 | * Disable timer based NMIs on all CPUs: | ||
394 | */ | ||
395 | void acpi_nmi_disable(void) | ||
396 | { | ||
397 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
398 | on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); | ||
399 | } | ||
400 | |||
401 | static void __acpi_nmi_enable(void *__unused) | ||
402 | { | ||
403 | apic_write_around(APIC_LVT0, APIC_DM_NMI); | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * Enable timer based NMIs on all CPUs: | ||
408 | */ | ||
409 | void acpi_nmi_enable(void) | ||
410 | { | ||
411 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
412 | on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); | ||
413 | } | ||
414 | |||
372 | #ifdef CONFIG_PM | 415 | #ifdef CONFIG_PM |
373 | 416 | ||
374 | static int nmi_pm_active; /* nmi_active before suspend */ | 417 | static int nmi_pm_active; /* nmi_active before suspend */ |
@@ -442,6 +485,17 @@ static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr) | |||
442 | wrmsrl(perfctr_msr, 0 - count); | 485 | wrmsrl(perfctr_msr, 0 - count); |
443 | } | 486 | } |
444 | 487 | ||
488 | static void write_watchdog_counter32(unsigned int perfctr_msr, | ||
489 | const char *descr) | ||
490 | { | ||
491 | u64 count = (u64)cpu_khz * 1000; | ||
492 | |||
493 | do_div(count, nmi_hz); | ||
494 | if(descr) | ||
495 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | ||
496 | wrmsr(perfctr_msr, (u32)(-count), 0); | ||
497 | } | ||
498 | |||
445 | /* Note that these events don't tick when the CPU idles. This means | 499 | /* Note that these events don't tick when the CPU idles. This means |
446 | the frequency varies with CPU load. */ | 500 | the frequency varies with CPU load. */ |
447 | 501 | ||
@@ -531,7 +585,8 @@ static int setup_p6_watchdog(void) | |||
531 | 585 | ||
532 | /* setup the timer */ | 586 | /* setup the timer */ |
533 | wrmsr(evntsel_msr, evntsel, 0); | 587 | wrmsr(evntsel_msr, evntsel, 0); |
534 | write_watchdog_counter(perfctr_msr, "P6_PERFCTR0"); | 588 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
589 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0"); | ||
535 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 590 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
536 | evntsel |= P6_EVNTSEL0_ENABLE; | 591 | evntsel |= P6_EVNTSEL0_ENABLE; |
537 | wrmsr(evntsel_msr, evntsel, 0); | 592 | wrmsr(evntsel_msr, evntsel, 0); |
@@ -704,7 +759,8 @@ static int setup_intel_arch_watchdog(void) | |||
704 | 759 | ||
705 | /* setup the timer */ | 760 | /* setup the timer */ |
706 | wrmsr(evntsel_msr, evntsel, 0); | 761 | wrmsr(evntsel_msr, evntsel, 0); |
707 | write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0"); | 762 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
763 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0"); | ||
708 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 764 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
709 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 765 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
710 | wrmsr(evntsel_msr, evntsel, 0); | 766 | wrmsr(evntsel_msr, evntsel, 0); |
@@ -762,7 +818,8 @@ void setup_apic_nmi_watchdog (void *unused) | |||
762 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 818 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
763 | switch (boot_cpu_data.x86_vendor) { | 819 | switch (boot_cpu_data.x86_vendor) { |
764 | case X86_VENDOR_AMD: | 820 | case X86_VENDOR_AMD: |
765 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) | 821 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && |
822 | boot_cpu_data.x86 != 16) | ||
766 | return; | 823 | return; |
767 | if (!setup_k7_watchdog()) | 824 | if (!setup_k7_watchdog()) |
768 | return; | 825 | return; |
@@ -956,6 +1013,8 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
956 | dummy &= ~P4_CCCR_OVF; | 1013 | dummy &= ~P4_CCCR_OVF; |
957 | wrmsrl(wd->cccr_msr, dummy); | 1014 | wrmsrl(wd->cccr_msr, dummy); |
958 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1015 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1016 | /* start the cycle over again */ | ||
1017 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
959 | } | 1018 | } |
960 | else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | 1019 | else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || |
961 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | 1020 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { |
@@ -964,9 +1023,12 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
964 | * other P6 variant. | 1023 | * other P6 variant. |
965 | * ArchPerfom/Core Duo also needs this */ | 1024 | * ArchPerfom/Core Duo also needs this */ |
966 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1025 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1026 | /* P6/ARCH_PERFMON has 32 bit counter write */ | ||
1027 | write_watchdog_counter32(wd->perfctr_msr, NULL); | ||
1028 | } else { | ||
1029 | /* start the cycle over again */ | ||
1030 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
967 | } | 1031 | } |
968 | /* start the cycle over again */ | ||
969 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
970 | rc = 1; | 1032 | rc = 1; |
971 | } else if (nmi_watchdog == NMI_IO_APIC) { | 1033 | } else if (nmi_watchdog == NMI_IO_APIC) { |
972 | /* don't know how to accurately check for this. | 1034 | /* don't know how to accurately check for this. |
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index e55fd05da0f5..c156ecfa3872 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c | |||
@@ -92,7 +92,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) | |||
92 | return insn_len; | 92 | return insn_len; |
93 | } | 93 | } |
94 | 94 | ||
95 | static fastcall unsigned long native_get_debugreg(int regno) | 95 | static unsigned long native_get_debugreg(int regno) |
96 | { | 96 | { |
97 | unsigned long val = 0; /* Damn you, gcc! */ | 97 | unsigned long val = 0; /* Damn you, gcc! */ |
98 | 98 | ||
@@ -115,7 +115,7 @@ static fastcall unsigned long native_get_debugreg(int regno) | |||
115 | return val; | 115 | return val; |
116 | } | 116 | } |
117 | 117 | ||
118 | static fastcall void native_set_debugreg(int regno, unsigned long value) | 118 | static void native_set_debugreg(int regno, unsigned long value) |
119 | { | 119 | { |
120 | switch (regno) { | 120 | switch (regno) { |
121 | case 0: | 121 | case 0: |
@@ -146,55 +146,55 @@ void init_IRQ(void) | |||
146 | paravirt_ops.init_IRQ(); | 146 | paravirt_ops.init_IRQ(); |
147 | } | 147 | } |
148 | 148 | ||
149 | static fastcall void native_clts(void) | 149 | static void native_clts(void) |
150 | { | 150 | { |
151 | asm volatile ("clts"); | 151 | asm volatile ("clts"); |
152 | } | 152 | } |
153 | 153 | ||
154 | static fastcall unsigned long native_read_cr0(void) | 154 | static unsigned long native_read_cr0(void) |
155 | { | 155 | { |
156 | unsigned long val; | 156 | unsigned long val; |
157 | asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); | 157 | asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); |
158 | return val; | 158 | return val; |
159 | } | 159 | } |
160 | 160 | ||
161 | static fastcall void native_write_cr0(unsigned long val) | 161 | static void native_write_cr0(unsigned long val) |
162 | { | 162 | { |
163 | asm volatile("movl %0,%%cr0": :"r" (val)); | 163 | asm volatile("movl %0,%%cr0": :"r" (val)); |
164 | } | 164 | } |
165 | 165 | ||
166 | static fastcall unsigned long native_read_cr2(void) | 166 | static unsigned long native_read_cr2(void) |
167 | { | 167 | { |
168 | unsigned long val; | 168 | unsigned long val; |
169 | asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); | 169 | asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); |
170 | return val; | 170 | return val; |
171 | } | 171 | } |
172 | 172 | ||
173 | static fastcall void native_write_cr2(unsigned long val) | 173 | static void native_write_cr2(unsigned long val) |
174 | { | 174 | { |
175 | asm volatile("movl %0,%%cr2": :"r" (val)); | 175 | asm volatile("movl %0,%%cr2": :"r" (val)); |
176 | } | 176 | } |
177 | 177 | ||
178 | static fastcall unsigned long native_read_cr3(void) | 178 | static unsigned long native_read_cr3(void) |
179 | { | 179 | { |
180 | unsigned long val; | 180 | unsigned long val; |
181 | asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); | 181 | asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); |
182 | return val; | 182 | return val; |
183 | } | 183 | } |
184 | 184 | ||
185 | static fastcall void native_write_cr3(unsigned long val) | 185 | static void native_write_cr3(unsigned long val) |
186 | { | 186 | { |
187 | asm volatile("movl %0,%%cr3": :"r" (val)); | 187 | asm volatile("movl %0,%%cr3": :"r" (val)); |
188 | } | 188 | } |
189 | 189 | ||
190 | static fastcall unsigned long native_read_cr4(void) | 190 | static unsigned long native_read_cr4(void) |
191 | { | 191 | { |
192 | unsigned long val; | 192 | unsigned long val; |
193 | asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); | 193 | asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); |
194 | return val; | 194 | return val; |
195 | } | 195 | } |
196 | 196 | ||
197 | static fastcall unsigned long native_read_cr4_safe(void) | 197 | static unsigned long native_read_cr4_safe(void) |
198 | { | 198 | { |
199 | unsigned long val; | 199 | unsigned long val; |
200 | /* This could fault if %cr4 does not exist */ | 200 | /* This could fault if %cr4 does not exist */ |
@@ -207,51 +207,51 @@ static fastcall unsigned long native_read_cr4_safe(void) | |||
207 | return val; | 207 | return val; |
208 | } | 208 | } |
209 | 209 | ||
210 | static fastcall void native_write_cr4(unsigned long val) | 210 | static void native_write_cr4(unsigned long val) |
211 | { | 211 | { |
212 | asm volatile("movl %0,%%cr4": :"r" (val)); | 212 | asm volatile("movl %0,%%cr4": :"r" (val)); |
213 | } | 213 | } |
214 | 214 | ||
215 | static fastcall unsigned long native_save_fl(void) | 215 | static unsigned long native_save_fl(void) |
216 | { | 216 | { |
217 | unsigned long f; | 217 | unsigned long f; |
218 | asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); | 218 | asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); |
219 | return f; | 219 | return f; |
220 | } | 220 | } |
221 | 221 | ||
222 | static fastcall void native_restore_fl(unsigned long f) | 222 | static void native_restore_fl(unsigned long f) |
223 | { | 223 | { |
224 | asm volatile("pushl %0 ; popfl": /* no output */ | 224 | asm volatile("pushl %0 ; popfl": /* no output */ |
225 | :"g" (f) | 225 | :"g" (f) |
226 | :"memory", "cc"); | 226 | :"memory", "cc"); |
227 | } | 227 | } |
228 | 228 | ||
229 | static fastcall void native_irq_disable(void) | 229 | static void native_irq_disable(void) |
230 | { | 230 | { |
231 | asm volatile("cli": : :"memory"); | 231 | asm volatile("cli": : :"memory"); |
232 | } | 232 | } |
233 | 233 | ||
234 | static fastcall void native_irq_enable(void) | 234 | static void native_irq_enable(void) |
235 | { | 235 | { |
236 | asm volatile("sti": : :"memory"); | 236 | asm volatile("sti": : :"memory"); |
237 | } | 237 | } |
238 | 238 | ||
239 | static fastcall void native_safe_halt(void) | 239 | static void native_safe_halt(void) |
240 | { | 240 | { |
241 | asm volatile("sti; hlt": : :"memory"); | 241 | asm volatile("sti; hlt": : :"memory"); |
242 | } | 242 | } |
243 | 243 | ||
244 | static fastcall void native_halt(void) | 244 | static void native_halt(void) |
245 | { | 245 | { |
246 | asm volatile("hlt": : :"memory"); | 246 | asm volatile("hlt": : :"memory"); |
247 | } | 247 | } |
248 | 248 | ||
249 | static fastcall void native_wbinvd(void) | 249 | static void native_wbinvd(void) |
250 | { | 250 | { |
251 | asm volatile("wbinvd": : :"memory"); | 251 | asm volatile("wbinvd": : :"memory"); |
252 | } | 252 | } |
253 | 253 | ||
254 | static fastcall unsigned long long native_read_msr(unsigned int msr, int *err) | 254 | static unsigned long long native_read_msr(unsigned int msr, int *err) |
255 | { | 255 | { |
256 | unsigned long long val; | 256 | unsigned long long val; |
257 | 257 | ||
@@ -270,7 +270,7 @@ static fastcall unsigned long long native_read_msr(unsigned int msr, int *err) | |||
270 | return val; | 270 | return val; |
271 | } | 271 | } |
272 | 272 | ||
273 | static fastcall int native_write_msr(unsigned int msr, unsigned long long val) | 273 | static int native_write_msr(unsigned int msr, unsigned long long val) |
274 | { | 274 | { |
275 | int err; | 275 | int err; |
276 | asm volatile("2: wrmsr ; xorl %0,%0\n" | 276 | asm volatile("2: wrmsr ; xorl %0,%0\n" |
@@ -288,53 +288,53 @@ static fastcall int native_write_msr(unsigned int msr, unsigned long long val) | |||
288 | return err; | 288 | return err; |
289 | } | 289 | } |
290 | 290 | ||
291 | static fastcall unsigned long long native_read_tsc(void) | 291 | static unsigned long long native_read_tsc(void) |
292 | { | 292 | { |
293 | unsigned long long val; | 293 | unsigned long long val; |
294 | asm volatile("rdtsc" : "=A" (val)); | 294 | asm volatile("rdtsc" : "=A" (val)); |
295 | return val; | 295 | return val; |
296 | } | 296 | } |
297 | 297 | ||
298 | static fastcall unsigned long long native_read_pmc(void) | 298 | static unsigned long long native_read_pmc(void) |
299 | { | 299 | { |
300 | unsigned long long val; | 300 | unsigned long long val; |
301 | asm volatile("rdpmc" : "=A" (val)); | 301 | asm volatile("rdpmc" : "=A" (val)); |
302 | return val; | 302 | return val; |
303 | } | 303 | } |
304 | 304 | ||
305 | static fastcall void native_load_tr_desc(void) | 305 | static void native_load_tr_desc(void) |
306 | { | 306 | { |
307 | asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); | 307 | asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); |
308 | } | 308 | } |
309 | 309 | ||
310 | static fastcall void native_load_gdt(const struct Xgt_desc_struct *dtr) | 310 | static void native_load_gdt(const struct Xgt_desc_struct *dtr) |
311 | { | 311 | { |
312 | asm volatile("lgdt %0"::"m" (*dtr)); | 312 | asm volatile("lgdt %0"::"m" (*dtr)); |
313 | } | 313 | } |
314 | 314 | ||
315 | static fastcall void native_load_idt(const struct Xgt_desc_struct *dtr) | 315 | static void native_load_idt(const struct Xgt_desc_struct *dtr) |
316 | { | 316 | { |
317 | asm volatile("lidt %0"::"m" (*dtr)); | 317 | asm volatile("lidt %0"::"m" (*dtr)); |
318 | } | 318 | } |
319 | 319 | ||
320 | static fastcall void native_store_gdt(struct Xgt_desc_struct *dtr) | 320 | static void native_store_gdt(struct Xgt_desc_struct *dtr) |
321 | { | 321 | { |
322 | asm ("sgdt %0":"=m" (*dtr)); | 322 | asm ("sgdt %0":"=m" (*dtr)); |
323 | } | 323 | } |
324 | 324 | ||
325 | static fastcall void native_store_idt(struct Xgt_desc_struct *dtr) | 325 | static void native_store_idt(struct Xgt_desc_struct *dtr) |
326 | { | 326 | { |
327 | asm ("sidt %0":"=m" (*dtr)); | 327 | asm ("sidt %0":"=m" (*dtr)); |
328 | } | 328 | } |
329 | 329 | ||
330 | static fastcall unsigned long native_store_tr(void) | 330 | static unsigned long native_store_tr(void) |
331 | { | 331 | { |
332 | unsigned long tr; | 332 | unsigned long tr; |
333 | asm ("str %0":"=r" (tr)); | 333 | asm ("str %0":"=r" (tr)); |
334 | return tr; | 334 | return tr; |
335 | } | 335 | } |
336 | 336 | ||
337 | static fastcall void native_load_tls(struct thread_struct *t, unsigned int cpu) | 337 | static void native_load_tls(struct thread_struct *t, unsigned int cpu) |
338 | { | 338 | { |
339 | #define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] | 339 | #define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] |
340 | C(0); C(1); C(2); | 340 | C(0); C(1); C(2); |
@@ -348,22 +348,22 @@ static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 | |||
348 | lp[1] = entry_high; | 348 | lp[1] = entry_high; |
349 | } | 349 | } |
350 | 350 | ||
351 | static fastcall void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) | 351 | static void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) |
352 | { | 352 | { |
353 | native_write_dt_entry(dt, entrynum, low, high); | 353 | native_write_dt_entry(dt, entrynum, low, high); |
354 | } | 354 | } |
355 | 355 | ||
356 | static fastcall void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) | 356 | static void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) |
357 | { | 357 | { |
358 | native_write_dt_entry(dt, entrynum, low, high); | 358 | native_write_dt_entry(dt, entrynum, low, high); |
359 | } | 359 | } |
360 | 360 | ||
361 | static fastcall void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) | 361 | static void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) |
362 | { | 362 | { |
363 | native_write_dt_entry(dt, entrynum, low, high); | 363 | native_write_dt_entry(dt, entrynum, low, high); |
364 | } | 364 | } |
365 | 365 | ||
366 | static fastcall void native_load_esp0(struct tss_struct *tss, | 366 | static void native_load_esp0(struct tss_struct *tss, |
367 | struct thread_struct *thread) | 367 | struct thread_struct *thread) |
368 | { | 368 | { |
369 | tss->esp0 = thread->esp0; | 369 | tss->esp0 = thread->esp0; |
@@ -375,12 +375,12 @@ static fastcall void native_load_esp0(struct tss_struct *tss, | |||
375 | } | 375 | } |
376 | } | 376 | } |
377 | 377 | ||
378 | static fastcall void native_io_delay(void) | 378 | static void native_io_delay(void) |
379 | { | 379 | { |
380 | asm volatile("outb %al,$0x80"); | 380 | asm volatile("outb %al,$0x80"); |
381 | } | 381 | } |
382 | 382 | ||
383 | static fastcall void native_flush_tlb(void) | 383 | static void native_flush_tlb(void) |
384 | { | 384 | { |
385 | __native_flush_tlb(); | 385 | __native_flush_tlb(); |
386 | } | 386 | } |
@@ -389,49 +389,49 @@ static fastcall void native_flush_tlb(void) | |||
389 | * Global pages have to be flushed a bit differently. Not a real | 389 | * Global pages have to be flushed a bit differently. Not a real |
390 | * performance problem because this does not happen often. | 390 | * performance problem because this does not happen often. |
391 | */ | 391 | */ |
392 | static fastcall void native_flush_tlb_global(void) | 392 | static void native_flush_tlb_global(void) |
393 | { | 393 | { |
394 | __native_flush_tlb_global(); | 394 | __native_flush_tlb_global(); |
395 | } | 395 | } |
396 | 396 | ||
397 | static fastcall void native_flush_tlb_single(u32 addr) | 397 | static void native_flush_tlb_single(u32 addr) |
398 | { | 398 | { |
399 | __native_flush_tlb_single(addr); | 399 | __native_flush_tlb_single(addr); |
400 | } | 400 | } |
401 | 401 | ||
402 | #ifndef CONFIG_X86_PAE | 402 | #ifndef CONFIG_X86_PAE |
403 | static fastcall void native_set_pte(pte_t *ptep, pte_t pteval) | 403 | static void native_set_pte(pte_t *ptep, pte_t pteval) |
404 | { | 404 | { |
405 | *ptep = pteval; | 405 | *ptep = pteval; |
406 | } | 406 | } |
407 | 407 | ||
408 | static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) | 408 | static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) |
409 | { | 409 | { |
410 | *ptep = pteval; | 410 | *ptep = pteval; |
411 | } | 411 | } |
412 | 412 | ||
413 | static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) | 413 | static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) |
414 | { | 414 | { |
415 | *pmdp = pmdval; | 415 | *pmdp = pmdval; |
416 | } | 416 | } |
417 | 417 | ||
418 | #else /* CONFIG_X86_PAE */ | 418 | #else /* CONFIG_X86_PAE */ |
419 | 419 | ||
420 | static fastcall void native_set_pte(pte_t *ptep, pte_t pte) | 420 | static void native_set_pte(pte_t *ptep, pte_t pte) |
421 | { | 421 | { |
422 | ptep->pte_high = pte.pte_high; | 422 | ptep->pte_high = pte.pte_high; |
423 | smp_wmb(); | 423 | smp_wmb(); |
424 | ptep->pte_low = pte.pte_low; | 424 | ptep->pte_low = pte.pte_low; |
425 | } | 425 | } |
426 | 426 | ||
427 | static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) | 427 | static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) |
428 | { | 428 | { |
429 | ptep->pte_high = pte.pte_high; | 429 | ptep->pte_high = pte.pte_high; |
430 | smp_wmb(); | 430 | smp_wmb(); |
431 | ptep->pte_low = pte.pte_low; | 431 | ptep->pte_low = pte.pte_low; |
432 | } | 432 | } |
433 | 433 | ||
434 | static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | 434 | static void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) |
435 | { | 435 | { |
436 | ptep->pte_low = 0; | 436 | ptep->pte_low = 0; |
437 | smp_wmb(); | 437 | smp_wmb(); |
@@ -440,29 +440,29 @@ static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long | |||
440 | ptep->pte_low = pte.pte_low; | 440 | ptep->pte_low = pte.pte_low; |
441 | } | 441 | } |
442 | 442 | ||
443 | static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval) | 443 | static void native_set_pte_atomic(pte_t *ptep, pte_t pteval) |
444 | { | 444 | { |
445 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); | 445 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); |
446 | } | 446 | } |
447 | 447 | ||
448 | static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) | 448 | static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) |
449 | { | 449 | { |
450 | set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); | 450 | set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); |
451 | } | 451 | } |
452 | 452 | ||
453 | static fastcall void native_set_pud(pud_t *pudp, pud_t pudval) | 453 | static void native_set_pud(pud_t *pudp, pud_t pudval) |
454 | { | 454 | { |
455 | *pudp = pudval; | 455 | *pudp = pudval; |
456 | } | 456 | } |
457 | 457 | ||
458 | static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 458 | static void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
459 | { | 459 | { |
460 | ptep->pte_low = 0; | 460 | ptep->pte_low = 0; |
461 | smp_wmb(); | 461 | smp_wmb(); |
462 | ptep->pte_high = 0; | 462 | ptep->pte_high = 0; |
463 | } | 463 | } |
464 | 464 | ||
465 | static fastcall void native_pmd_clear(pmd_t *pmd) | 465 | static void native_pmd_clear(pmd_t *pmd) |
466 | { | 466 | { |
467 | u32 *tmp = (u32 *)pmd; | 467 | u32 *tmp = (u32 *)pmd; |
468 | *tmp = 0; | 468 | *tmp = 0; |
@@ -472,8 +472,8 @@ static fastcall void native_pmd_clear(pmd_t *pmd) | |||
472 | #endif /* CONFIG_X86_PAE */ | 472 | #endif /* CONFIG_X86_PAE */ |
473 | 473 | ||
474 | /* These are in entry.S */ | 474 | /* These are in entry.S */ |
475 | extern fastcall void native_iret(void); | 475 | extern void native_iret(void); |
476 | extern fastcall void native_irq_enable_sysexit(void); | 476 | extern void native_irq_enable_sysexit(void); |
477 | 477 | ||
478 | static int __init print_banner(void) | 478 | static int __init print_banner(void) |
479 | { | 479 | { |
@@ -482,9 +482,6 @@ static int __init print_banner(void) | |||
482 | } | 482 | } |
483 | core_initcall(print_banner); | 483 | core_initcall(print_banner); |
484 | 484 | ||
485 | /* We simply declare start_kernel to be the paravirt probe of last resort. */ | ||
486 | paravirt_probe(start_kernel); | ||
487 | |||
488 | struct paravirt_ops paravirt_ops = { | 485 | struct paravirt_ops paravirt_ops = { |
489 | .name = "bare hardware", | 486 | .name = "bare hardware", |
490 | .paravirt_enabled = 0, | 487 | .paravirt_enabled = 0, |
@@ -544,12 +541,21 @@ struct paravirt_ops paravirt_ops = { | |||
544 | .apic_write = native_apic_write, | 541 | .apic_write = native_apic_write, |
545 | .apic_write_atomic = native_apic_write_atomic, | 542 | .apic_write_atomic = native_apic_write_atomic, |
546 | .apic_read = native_apic_read, | 543 | .apic_read = native_apic_read, |
544 | .setup_boot_clock = setup_boot_APIC_clock, | ||
545 | .setup_secondary_clock = setup_secondary_APIC_clock, | ||
547 | #endif | 546 | #endif |
547 | .set_lazy_mode = (void *)native_nop, | ||
548 | 548 | ||
549 | .flush_tlb_user = native_flush_tlb, | 549 | .flush_tlb_user = native_flush_tlb, |
550 | .flush_tlb_kernel = native_flush_tlb_global, | 550 | .flush_tlb_kernel = native_flush_tlb_global, |
551 | .flush_tlb_single = native_flush_tlb_single, | 551 | .flush_tlb_single = native_flush_tlb_single, |
552 | 552 | ||
553 | .alloc_pt = (void *)native_nop, | ||
554 | .alloc_pd = (void *)native_nop, | ||
555 | .alloc_pd_clone = (void *)native_nop, | ||
556 | .release_pt = (void *)native_nop, | ||
557 | .release_pd = (void *)native_nop, | ||
558 | |||
553 | .set_pte = native_set_pte, | 559 | .set_pte = native_set_pte, |
554 | .set_pte_at = native_set_pte_at, | 560 | .set_pte_at = native_set_pte_at, |
555 | .set_pmd = native_set_pmd, | 561 | .set_pmd = native_set_pmd, |
@@ -565,6 +571,8 @@ struct paravirt_ops paravirt_ops = { | |||
565 | 571 | ||
566 | .irq_enable_sysexit = native_irq_enable_sysexit, | 572 | .irq_enable_sysexit = native_irq_enable_sysexit, |
567 | .iret = native_iret, | 573 | .iret = native_iret, |
574 | |||
575 | .startup_ipi_hook = (void *)native_nop, | ||
568 | }; | 576 | }; |
569 | 577 | ||
570 | /* | 578 | /* |
diff --git a/arch/i386/kernel/pcspeaker.c b/arch/i386/kernel/pcspeaker.c new file mode 100644 index 000000000000..bc1f2d3ea277 --- /dev/null +++ b/arch/i386/kernel/pcspeaker.c | |||
@@ -0,0 +1,20 @@ | |||
1 | #include <linux/platform_device.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/init.h> | ||
4 | |||
5 | static __init int add_pcspkr(void) | ||
6 | { | ||
7 | struct platform_device *pd; | ||
8 | int ret; | ||
9 | |||
10 | pd = platform_device_alloc("pcspkr", -1); | ||
11 | if (!pd) | ||
12 | return -ENOMEM; | ||
13 | |||
14 | ret = platform_device_add(pd); | ||
15 | if (ret) | ||
16 | platform_device_put(pd); | ||
17 | |||
18 | return ret; | ||
19 | } | ||
20 | device_initcall(add_pcspkr); | ||
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index c641056233a6..7845d480c293 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <asm/i387.h> | 48 | #include <asm/i387.h> |
49 | #include <asm/desc.h> | 49 | #include <asm/desc.h> |
50 | #include <asm/vm86.h> | 50 | #include <asm/vm86.h> |
51 | #include <asm/idle.h> | ||
51 | #ifdef CONFIG_MATH_EMULATION | 52 | #ifdef CONFIG_MATH_EMULATION |
52 | #include <asm/math_emu.h> | 53 | #include <asm/math_emu.h> |
53 | #endif | 54 | #endif |
@@ -80,6 +81,42 @@ void (*pm_idle)(void); | |||
80 | EXPORT_SYMBOL(pm_idle); | 81 | EXPORT_SYMBOL(pm_idle); |
81 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); | 82 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); |
82 | 83 | ||
84 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); | ||
85 | |||
86 | void idle_notifier_register(struct notifier_block *n) | ||
87 | { | ||
88 | atomic_notifier_chain_register(&idle_notifier, n); | ||
89 | } | ||
90 | |||
91 | void idle_notifier_unregister(struct notifier_block *n) | ||
92 | { | ||
93 | atomic_notifier_chain_unregister(&idle_notifier, n); | ||
94 | } | ||
95 | |||
96 | static DEFINE_PER_CPU(volatile unsigned long, idle_state); | ||
97 | |||
98 | void enter_idle(void) | ||
99 | { | ||
100 | /* needs to be atomic w.r.t. interrupts, not against other CPUs */ | ||
101 | __set_bit(0, &__get_cpu_var(idle_state)); | ||
102 | atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); | ||
103 | } | ||
104 | |||
105 | static void __exit_idle(void) | ||
106 | { | ||
107 | /* needs to be atomic w.r.t. interrupts, not against other CPUs */ | ||
108 | if (__test_and_clear_bit(0, &__get_cpu_var(idle_state)) == 0) | ||
109 | return; | ||
110 | atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); | ||
111 | } | ||
112 | |||
113 | void exit_idle(void) | ||
114 | { | ||
115 | if (current->pid) | ||
116 | return; | ||
117 | __exit_idle(); | ||
118 | } | ||
119 | |||
83 | void disable_hlt(void) | 120 | void disable_hlt(void) |
84 | { | 121 | { |
85 | hlt_counter++; | 122 | hlt_counter++; |
@@ -130,6 +167,7 @@ EXPORT_SYMBOL(default_idle); | |||
130 | */ | 167 | */ |
131 | static void poll_idle (void) | 168 | static void poll_idle (void) |
132 | { | 169 | { |
170 | local_irq_enable(); | ||
133 | cpu_relax(); | 171 | cpu_relax(); |
134 | } | 172 | } |
135 | 173 | ||
@@ -189,7 +227,16 @@ void cpu_idle(void) | |||
189 | play_dead(); | 227 | play_dead(); |
190 | 228 | ||
191 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; | 229 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; |
230 | |||
231 | /* | ||
232 | * Idle routines should keep interrupts disabled | ||
233 | * from here on, until they go to idle. | ||
234 | * Otherwise, idle callbacks can misfire. | ||
235 | */ | ||
236 | local_irq_disable(); | ||
237 | enter_idle(); | ||
192 | idle(); | 238 | idle(); |
239 | __exit_idle(); | ||
193 | } | 240 | } |
194 | preempt_enable_no_resched(); | 241 | preempt_enable_no_resched(); |
195 | schedule(); | 242 | schedule(); |
@@ -243,7 +290,11 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) | |||
243 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 290 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
244 | smp_mb(); | 291 | smp_mb(); |
245 | if (!need_resched()) | 292 | if (!need_resched()) |
246 | __mwait(eax, ecx); | 293 | __sti_mwait(eax, ecx); |
294 | else | ||
295 | local_irq_enable(); | ||
296 | } else { | ||
297 | local_irq_enable(); | ||
247 | } | 298 | } |
248 | } | 299 | } |
249 | 300 | ||
@@ -308,8 +359,8 @@ void show_regs(struct pt_regs * regs) | |||
308 | regs->eax,regs->ebx,regs->ecx,regs->edx); | 359 | regs->eax,regs->ebx,regs->ecx,regs->edx); |
309 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", | 360 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", |
310 | regs->esi, regs->edi, regs->ebp); | 361 | regs->esi, regs->edi, regs->ebp); |
311 | printk(" DS: %04x ES: %04x GS: %04x\n", | 362 | printk(" DS: %04x ES: %04x FS: %04x\n", |
312 | 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); | 363 | 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); |
313 | 364 | ||
314 | cr0 = read_cr0(); | 365 | cr0 = read_cr0(); |
315 | cr2 = read_cr2(); | 366 | cr2 = read_cr2(); |
@@ -340,7 +391,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | |||
340 | 391 | ||
341 | regs.xds = __USER_DS; | 392 | regs.xds = __USER_DS; |
342 | regs.xes = __USER_DS; | 393 | regs.xes = __USER_DS; |
343 | regs.xgs = __KERNEL_PDA; | 394 | regs.xfs = __KERNEL_PDA; |
344 | regs.orig_eax = -1; | 395 | regs.orig_eax = -1; |
345 | regs.eip = (unsigned long) kernel_thread_helper; | 396 | regs.eip = (unsigned long) kernel_thread_helper; |
346 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); | 397 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); |
@@ -425,7 +476,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, | |||
425 | 476 | ||
426 | p->thread.eip = (unsigned long) ret_from_fork; | 477 | p->thread.eip = (unsigned long) ret_from_fork; |
427 | 478 | ||
428 | savesegment(fs,p->thread.fs); | 479 | savesegment(gs,p->thread.gs); |
429 | 480 | ||
430 | tsk = current; | 481 | tsk = current; |
431 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 482 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
@@ -501,8 +552,8 @@ void dump_thread(struct pt_regs * regs, struct user * dump) | |||
501 | dump->regs.eax = regs->eax; | 552 | dump->regs.eax = regs->eax; |
502 | dump->regs.ds = regs->xds; | 553 | dump->regs.ds = regs->xds; |
503 | dump->regs.es = regs->xes; | 554 | dump->regs.es = regs->xes; |
504 | savesegment(fs,dump->regs.fs); | 555 | dump->regs.fs = regs->xfs; |
505 | dump->regs.gs = regs->xgs; | 556 | savesegment(gs,dump->regs.gs); |
506 | dump->regs.orig_eax = regs->orig_eax; | 557 | dump->regs.orig_eax = regs->orig_eax; |
507 | dump->regs.eip = regs->eip; | 558 | dump->regs.eip = regs->eip; |
508 | dump->regs.cs = regs->xcs; | 559 | dump->regs.cs = regs->xcs; |
@@ -653,7 +704,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
653 | load_esp0(tss, next); | 704 | load_esp0(tss, next); |
654 | 705 | ||
655 | /* | 706 | /* |
656 | * Save away %fs. No need to save %gs, as it was saved on the | 707 | * Save away %gs. No need to save %fs, as it was saved on the |
657 | * stack on entry. No need to save %es and %ds, as those are | 708 | * stack on entry. No need to save %es and %ds, as those are |
658 | * always kernel segments while inside the kernel. Doing this | 709 | * always kernel segments while inside the kernel. Doing this |
659 | * before setting the new TLS descriptors avoids the situation | 710 | * before setting the new TLS descriptors avoids the situation |
@@ -662,7 +713,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
662 | * used %fs or %gs (it does not today), or if the kernel is | 713 | * used %fs or %gs (it does not today), or if the kernel is |
663 | * running inside of a hypervisor layer. | 714 | * running inside of a hypervisor layer. |
664 | */ | 715 | */ |
665 | savesegment(fs, prev->fs); | 716 | savesegment(gs, prev->gs); |
666 | 717 | ||
667 | /* | 718 | /* |
668 | * Load the per-thread Thread-Local Storage descriptor. | 719 | * Load the per-thread Thread-Local Storage descriptor. |
@@ -670,14 +721,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
670 | load_TLS(next, cpu); | 721 | load_TLS(next, cpu); |
671 | 722 | ||
672 | /* | 723 | /* |
673 | * Restore %fs if needed. | 724 | * Restore IOPL if needed. In normal use, the flags restore |
674 | * | 725 | * in the switch assembly will handle this. But if the kernel |
675 | * Glibc normally makes %fs be zero. | 726 | * is running virtualized at a non-zero CPL, the popf will |
727 | * not restore flags, so it must be done in a separate step. | ||
676 | */ | 728 | */ |
677 | if (unlikely(prev->fs | next->fs)) | 729 | if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) |
678 | loadsegment(fs, next->fs); | 730 | set_iopl_mask(next->iopl); |
679 | |||
680 | write_pda(pcurrent, next_p); | ||
681 | 731 | ||
682 | /* | 732 | /* |
683 | * Now maybe handle debug registers and/or IO bitmaps | 733 | * Now maybe handle debug registers and/or IO bitmaps |
@@ -688,6 +738,15 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
688 | 738 | ||
689 | disable_tsc(prev_p, next_p); | 739 | disable_tsc(prev_p, next_p); |
690 | 740 | ||
741 | /* | ||
742 | * Leave lazy mode, flushing any hypercalls made here. | ||
743 | * This must be done before restoring TLS segments so | ||
744 | * the GDT and LDT are properly updated, and must be | ||
745 | * done before math_state_restore, so the TS bit is up | ||
746 | * to date. | ||
747 | */ | ||
748 | arch_leave_lazy_cpu_mode(); | ||
749 | |||
691 | /* If the task has used fpu the last 5 timeslices, just do a full | 750 | /* If the task has used fpu the last 5 timeslices, just do a full |
692 | * restore of the math state immediately to avoid the trap; the | 751 | * restore of the math state immediately to avoid the trap; the |
693 | * chances of needing FPU soon are obviously high now | 752 | * chances of needing FPU soon are obviously high now |
@@ -695,6 +754,14 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
695 | if (next_p->fpu_counter > 5) | 754 | if (next_p->fpu_counter > 5) |
696 | math_state_restore(); | 755 | math_state_restore(); |
697 | 756 | ||
757 | /* | ||
758 | * Restore %gs if needed (which is common) | ||
759 | */ | ||
760 | if (prev->gs | next->gs) | ||
761 | loadsegment(gs, next->gs); | ||
762 | |||
763 | write_pda(pcurrent, next_p); | ||
764 | |||
698 | return prev_p; | 765 | return prev_p; |
699 | } | 766 | } |
700 | 767 | ||
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index af8aabe85800..4a8f8a259723 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c | |||
@@ -89,14 +89,14 @@ static int putreg(struct task_struct *child, | |||
89 | unsigned long regno, unsigned long value) | 89 | unsigned long regno, unsigned long value) |
90 | { | 90 | { |
91 | switch (regno >> 2) { | 91 | switch (regno >> 2) { |
92 | case FS: | 92 | case GS: |
93 | if (value && (value & 3) != 3) | 93 | if (value && (value & 3) != 3) |
94 | return -EIO; | 94 | return -EIO; |
95 | child->thread.fs = value; | 95 | child->thread.gs = value; |
96 | return 0; | 96 | return 0; |
97 | case DS: | 97 | case DS: |
98 | case ES: | 98 | case ES: |
99 | case GS: | 99 | case FS: |
100 | if (value && (value & 3) != 3) | 100 | if (value && (value & 3) != 3) |
101 | return -EIO; | 101 | return -EIO; |
102 | value &= 0xffff; | 102 | value &= 0xffff; |
@@ -112,7 +112,7 @@ static int putreg(struct task_struct *child, | |||
112 | value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; | 112 | value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; |
113 | break; | 113 | break; |
114 | } | 114 | } |
115 | if (regno > ES*4) | 115 | if (regno > FS*4) |
116 | regno -= 1*4; | 116 | regno -= 1*4; |
117 | put_stack_long(child, regno, value); | 117 | put_stack_long(child, regno, value); |
118 | return 0; | 118 | return 0; |
@@ -124,18 +124,18 @@ static unsigned long getreg(struct task_struct *child, | |||
124 | unsigned long retval = ~0UL; | 124 | unsigned long retval = ~0UL; |
125 | 125 | ||
126 | switch (regno >> 2) { | 126 | switch (regno >> 2) { |
127 | case FS: | 127 | case GS: |
128 | retval = child->thread.fs; | 128 | retval = child->thread.gs; |
129 | break; | 129 | break; |
130 | case DS: | 130 | case DS: |
131 | case ES: | 131 | case ES: |
132 | case GS: | 132 | case FS: |
133 | case SS: | 133 | case SS: |
134 | case CS: | 134 | case CS: |
135 | retval = 0xffff; | 135 | retval = 0xffff; |
136 | /* fall through */ | 136 | /* fall through */ |
137 | default: | 137 | default: |
138 | if (regno > ES*4) | 138 | if (regno > FS*4) |
139 | regno -= 1*4; | 139 | regno -= 1*4; |
140 | retval &= get_stack_long(child, regno); | 140 | retval &= get_stack_long(child, regno); |
141 | } | 141 | } |
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 4694ac980cd2..122623dcc6e1 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/initrd.h> | 33 | #include <linux/initrd.h> |
34 | #include <linux/bootmem.h> | 34 | #include <linux/bootmem.h> |
35 | #include <linux/seq_file.h> | 35 | #include <linux/seq_file.h> |
36 | #include <linux/platform_device.h> | ||
37 | #include <linux/console.h> | 36 | #include <linux/console.h> |
38 | #include <linux/mca.h> | 37 | #include <linux/mca.h> |
39 | #include <linux/root_dev.h> | 38 | #include <linux/root_dev.h> |
@@ -60,6 +59,7 @@ | |||
60 | #include <asm/io_apic.h> | 59 | #include <asm/io_apic.h> |
61 | #include <asm/ist.h> | 60 | #include <asm/ist.h> |
62 | #include <asm/io.h> | 61 | #include <asm/io.h> |
62 | #include <asm/vmi.h> | ||
63 | #include <setup_arch.h> | 63 | #include <setup_arch.h> |
64 | #include <bios_ebda.h> | 64 | #include <bios_ebda.h> |
65 | 65 | ||
@@ -581,6 +581,14 @@ void __init setup_arch(char **cmdline_p) | |||
581 | 581 | ||
582 | max_low_pfn = setup_memory(); | 582 | max_low_pfn = setup_memory(); |
583 | 583 | ||
584 | #ifdef CONFIG_VMI | ||
585 | /* | ||
586 | * Must be after max_low_pfn is determined, and before kernel | ||
587 | * pagetables are setup. | ||
588 | */ | ||
589 | vmi_init(); | ||
590 | #endif | ||
591 | |||
584 | /* | 592 | /* |
585 | * NOTE: before this point _nobody_ is allowed to allocate | 593 | * NOTE: before this point _nobody_ is allowed to allocate |
586 | * any memory using the bootmem allocator. Although the | 594 | * any memory using the bootmem allocator. Although the |
@@ -651,28 +659,3 @@ void __init setup_arch(char **cmdline_p) | |||
651 | #endif | 659 | #endif |
652 | tsc_init(); | 660 | tsc_init(); |
653 | } | 661 | } |
654 | |||
655 | static __init int add_pcspkr(void) | ||
656 | { | ||
657 | struct platform_device *pd; | ||
658 | int ret; | ||
659 | |||
660 | pd = platform_device_alloc("pcspkr", -1); | ||
661 | if (!pd) | ||
662 | return -ENOMEM; | ||
663 | |||
664 | ret = platform_device_add(pd); | ||
665 | if (ret) | ||
666 | platform_device_put(pd); | ||
667 | |||
668 | return ret; | ||
669 | } | ||
670 | device_initcall(add_pcspkr); | ||
671 | |||
672 | /* | ||
673 | * Local Variables: | ||
674 | * mode:c | ||
675 | * c-file-style:"k&r" | ||
676 | * c-basic-offset:8 | ||
677 | * End: | ||
678 | */ | ||
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 65d7620eaa09..4f99e870c986 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/suspend.h> | 21 | #include <linux/suspend.h> |
22 | #include <linux/ptrace.h> | 22 | #include <linux/ptrace.h> |
23 | #include <linux/elf.h> | 23 | #include <linux/elf.h> |
24 | #include <linux/binfmts.h> | ||
24 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
25 | #include <asm/ucontext.h> | 26 | #include <asm/ucontext.h> |
26 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
@@ -128,8 +129,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax | |||
128 | X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ | 129 | X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ |
129 | X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) | 130 | X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) |
130 | 131 | ||
131 | COPY_SEG(gs); | 132 | GET_SEG(gs); |
132 | GET_SEG(fs); | 133 | COPY_SEG(fs); |
133 | COPY_SEG(es); | 134 | COPY_SEG(es); |
134 | COPY_SEG(ds); | 135 | COPY_SEG(ds); |
135 | COPY(edi); | 136 | COPY(edi); |
@@ -244,9 +245,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, | |||
244 | { | 245 | { |
245 | int tmp, err = 0; | 246 | int tmp, err = 0; |
246 | 247 | ||
247 | err |= __put_user(regs->xgs, (unsigned int __user *)&sc->gs); | 248 | err |= __put_user(regs->xfs, (unsigned int __user *)&sc->fs); |
248 | savesegment(fs, tmp); | 249 | savesegment(gs, tmp); |
249 | err |= __put_user(tmp, (unsigned int __user *)&sc->fs); | 250 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); |
250 | 251 | ||
251 | err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); | 252 | err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); |
252 | err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds); | 253 | err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds); |
@@ -349,7 +350,10 @@ static int setup_frame(int sig, struct k_sigaction *ka, | |||
349 | goto give_sigsegv; | 350 | goto give_sigsegv; |
350 | } | 351 | } |
351 | 352 | ||
352 | restorer = (void *)VDSO_SYM(&__kernel_sigreturn); | 353 | if (current->binfmt->hasvdso) |
354 | restorer = (void *)VDSO_SYM(&__kernel_sigreturn); | ||
355 | else | ||
356 | restorer = (void *)&frame->retcode; | ||
353 | if (ka->sa.sa_flags & SA_RESTORER) | 357 | if (ka->sa.sa_flags & SA_RESTORER) |
354 | restorer = ka->sa.sa_restorer; | 358 | restorer = ka->sa.sa_restorer; |
355 | 359 | ||
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 5285aff8367f..9bd9637ae692 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c | |||
@@ -23,6 +23,7 @@ | |||
23 | 23 | ||
24 | #include <asm/mtrr.h> | 24 | #include <asm/mtrr.h> |
25 | #include <asm/tlbflush.h> | 25 | #include <asm/tlbflush.h> |
26 | #include <asm/idle.h> | ||
26 | #include <mach_apic.h> | 27 | #include <mach_apic.h> |
27 | 28 | ||
28 | /* | 29 | /* |
@@ -374,8 +375,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |||
374 | /* | 375 | /* |
375 | * i'm not happy about this global shared spinlock in the | 376 | * i'm not happy about this global shared spinlock in the |
376 | * MM hot path, but we'll see how contended it is. | 377 | * MM hot path, but we'll see how contended it is. |
377 | * Temporarily this turns IRQs off, so that lockups are | 378 | * AK: x86-64 has a faster method that could be ported. |
378 | * detected by the NMI watchdog. | ||
379 | */ | 379 | */ |
380 | spin_lock(&tlbstate_lock); | 380 | spin_lock(&tlbstate_lock); |
381 | 381 | ||
@@ -400,7 +400,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |||
400 | 400 | ||
401 | while (!cpus_empty(flush_cpumask)) | 401 | while (!cpus_empty(flush_cpumask)) |
402 | /* nothing. lockup detection does not belong here */ | 402 | /* nothing. lockup detection does not belong here */ |
403 | mb(); | 403 | cpu_relax(); |
404 | 404 | ||
405 | flush_mm = NULL; | 405 | flush_mm = NULL; |
406 | flush_va = 0; | 406 | flush_va = 0; |
@@ -624,6 +624,7 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs) | |||
624 | /* | 624 | /* |
625 | * At this point the info structure may be out of scope unless wait==1 | 625 | * At this point the info structure may be out of scope unless wait==1 |
626 | */ | 626 | */ |
627 | exit_idle(); | ||
627 | irq_enter(); | 628 | irq_enter(); |
628 | (*func)(info); | 629 | (*func)(info); |
629 | irq_exit(); | 630 | irq_exit(); |
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 8c6c8c52b95c..f46a4d095e6c 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -63,6 +63,7 @@ | |||
63 | #include <mach_apic.h> | 63 | #include <mach_apic.h> |
64 | #include <mach_wakecpu.h> | 64 | #include <mach_wakecpu.h> |
65 | #include <smpboot_hooks.h> | 65 | #include <smpboot_hooks.h> |
66 | #include <asm/vmi.h> | ||
66 | 67 | ||
67 | /* Set if we find a B stepping CPU */ | 68 | /* Set if we find a B stepping CPU */ |
68 | static int __devinitdata smp_b_stepping; | 69 | static int __devinitdata smp_b_stepping; |
@@ -545,12 +546,15 @@ static void __cpuinit start_secondary(void *unused) | |||
545 | * booting is too fragile that we want to limit the | 546 | * booting is too fragile that we want to limit the |
546 | * things done here to the most necessary things. | 547 | * things done here to the most necessary things. |
547 | */ | 548 | */ |
549 | #ifdef CONFIG_VMI | ||
550 | vmi_bringup(); | ||
551 | #endif | ||
548 | secondary_cpu_init(); | 552 | secondary_cpu_init(); |
549 | preempt_disable(); | 553 | preempt_disable(); |
550 | smp_callin(); | 554 | smp_callin(); |
551 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | 555 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) |
552 | rep_nop(); | 556 | rep_nop(); |
553 | setup_secondary_APIC_clock(); | 557 | setup_secondary_clock(); |
554 | if (nmi_watchdog == NMI_IO_APIC) { | 558 | if (nmi_watchdog == NMI_IO_APIC) { |
555 | disable_8259A_irq(0); | 559 | disable_8259A_irq(0); |
556 | enable_NMI_through_LVT0(NULL); | 560 | enable_NMI_through_LVT0(NULL); |
@@ -619,7 +623,6 @@ extern struct { | |||
619 | unsigned short ss; | 623 | unsigned short ss; |
620 | } stack_start; | 624 | } stack_start; |
621 | extern struct i386_pda *start_pda; | 625 | extern struct i386_pda *start_pda; |
622 | extern struct Xgt_desc_struct cpu_gdt_descr; | ||
623 | 626 | ||
624 | #ifdef CONFIG_NUMA | 627 | #ifdef CONFIG_NUMA |
625 | 628 | ||
@@ -835,6 +838,13 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
835 | num_starts = 0; | 838 | num_starts = 0; |
836 | 839 | ||
837 | /* | 840 | /* |
841 | * Paravirt / VMI wants a startup IPI hook here to set up the | ||
842 | * target processor state. | ||
843 | */ | ||
844 | startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, | ||
845 | (unsigned long) stack_start.esp); | ||
846 | |||
847 | /* | ||
838 | * Run STARTUP IPI loop. | 848 | * Run STARTUP IPI loop. |
839 | */ | 849 | */ |
840 | Dprintk("#startup loops: %d.\n", num_starts); | 850 | Dprintk("#startup loops: %d.\n", num_starts); |
@@ -1320,7 +1330,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
1320 | 1330 | ||
1321 | smpboot_setup_io_apic(); | 1331 | smpboot_setup_io_apic(); |
1322 | 1332 | ||
1323 | setup_boot_APIC_clock(); | 1333 | setup_boot_clock(); |
1324 | 1334 | ||
1325 | /* | 1335 | /* |
1326 | * Synchronize the TSC with the AP | 1336 | * Synchronize the TSC with the AP |
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index bc882a2b1db6..13ca54a85a1c 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c | |||
@@ -78,7 +78,7 @@ int __init sysenter_setup(void) | |||
78 | syscall_pages[0] = virt_to_page(syscall_page); | 78 | syscall_pages[0] = virt_to_page(syscall_page); |
79 | 79 | ||
80 | #ifdef CONFIG_COMPAT_VDSO | 80 | #ifdef CONFIG_COMPAT_VDSO |
81 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); | 81 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC); |
82 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | 82 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); |
83 | #endif | 83 | #endif |
84 | 84 | ||
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index c505b16c0990..a4f67a6e6821 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c | |||
@@ -131,15 +131,13 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
131 | unsigned long pc = instruction_pointer(regs); | 131 | unsigned long pc = instruction_pointer(regs); |
132 | 132 | ||
133 | #ifdef CONFIG_SMP | 133 | #ifdef CONFIG_SMP |
134 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { | 134 | if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) && |
135 | in_lock_functions(pc)) { | ||
135 | #ifdef CONFIG_FRAME_POINTER | 136 | #ifdef CONFIG_FRAME_POINTER |
136 | return *(unsigned long *)(regs->ebp + 4); | 137 | return *(unsigned long *)(regs->ebp + 4); |
137 | #else | 138 | #else |
138 | unsigned long *sp; | 139 | unsigned long *sp = (unsigned long *)®s->esp; |
139 | if ((regs->xcs & 3) == 0) | 140 | |
140 | sp = (unsigned long *)®s->esp; | ||
141 | else | ||
142 | sp = (unsigned long *)regs->esp; | ||
143 | /* Return address is either directly at stack pointer | 141 | /* Return address is either directly at stack pointer |
144 | or above a saved eflags. Eflags has bits 22-31 zero, | 142 | or above a saved eflags. Eflags has bits 22-31 zero, |
145 | kernel addresses don't. */ | 143 | kernel addresses don't. */ |
@@ -232,6 +230,7 @@ EXPORT_SYMBOL(get_cmos_time); | |||
232 | static void sync_cmos_clock(unsigned long dummy); | 230 | static void sync_cmos_clock(unsigned long dummy); |
233 | 231 | ||
234 | static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); | 232 | static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); |
233 | int no_sync_cmos_clock; | ||
235 | 234 | ||
236 | static void sync_cmos_clock(unsigned long dummy) | 235 | static void sync_cmos_clock(unsigned long dummy) |
237 | { | 236 | { |
@@ -275,7 +274,8 @@ static void sync_cmos_clock(unsigned long dummy) | |||
275 | 274 | ||
276 | void notify_arch_cmos_timer(void) | 275 | void notify_arch_cmos_timer(void) |
277 | { | 276 | { |
278 | mod_timer(&sync_cmos_timer, jiffies + 1); | 277 | if (!no_sync_cmos_clock) |
278 | mod_timer(&sync_cmos_timer, jiffies + 1); | ||
279 | } | 279 | } |
280 | 280 | ||
281 | static long clock_cmos_diff; | 281 | static long clock_cmos_diff; |
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 0efad8aeb41a..af0d3f70a817 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c | |||
@@ -94,6 +94,7 @@ asmlinkage void spurious_interrupt_bug(void); | |||
94 | asmlinkage void machine_check(void); | 94 | asmlinkage void machine_check(void); |
95 | 95 | ||
96 | int kstack_depth_to_print = 24; | 96 | int kstack_depth_to_print = 24; |
97 | static unsigned int code_bytes = 64; | ||
97 | ATOMIC_NOTIFIER_HEAD(i386die_chain); | 98 | ATOMIC_NOTIFIER_HEAD(i386die_chain); |
98 | 99 | ||
99 | int register_die_notifier(struct notifier_block *nb) | 100 | int register_die_notifier(struct notifier_block *nb) |
@@ -291,10 +292,11 @@ void show_registers(struct pt_regs *regs) | |||
291 | int i; | 292 | int i; |
292 | int in_kernel = 1; | 293 | int in_kernel = 1; |
293 | unsigned long esp; | 294 | unsigned long esp; |
294 | unsigned short ss; | 295 | unsigned short ss, gs; |
295 | 296 | ||
296 | esp = (unsigned long) (®s->esp); | 297 | esp = (unsigned long) (®s->esp); |
297 | savesegment(ss, ss); | 298 | savesegment(ss, ss); |
299 | savesegment(gs, gs); | ||
298 | if (user_mode_vm(regs)) { | 300 | if (user_mode_vm(regs)) { |
299 | in_kernel = 0; | 301 | in_kernel = 0; |
300 | esp = regs->esp; | 302 | esp = regs->esp; |
@@ -313,8 +315,8 @@ void show_registers(struct pt_regs *regs) | |||
313 | regs->eax, regs->ebx, regs->ecx, regs->edx); | 315 | regs->eax, regs->ebx, regs->ecx, regs->edx); |
314 | printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", | 316 | printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", |
315 | regs->esi, regs->edi, regs->ebp, esp); | 317 | regs->esi, regs->edi, regs->ebp, esp); |
316 | printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", | 318 | printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", |
317 | regs->xds & 0xffff, regs->xes & 0xffff, ss); | 319 | regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); |
318 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", | 320 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", |
319 | TASK_COMM_LEN, current->comm, current->pid, | 321 | TASK_COMM_LEN, current->comm, current->pid, |
320 | current_thread_info(), current, current->thread_info); | 322 | current_thread_info(), current, current->thread_info); |
@@ -324,7 +326,8 @@ void show_registers(struct pt_regs *regs) | |||
324 | */ | 326 | */ |
325 | if (in_kernel) { | 327 | if (in_kernel) { |
326 | u8 *eip; | 328 | u8 *eip; |
327 | int code_bytes = 64; | 329 | unsigned int code_prologue = code_bytes * 43 / 64; |
330 | unsigned int code_len = code_bytes; | ||
328 | unsigned char c; | 331 | unsigned char c; |
329 | 332 | ||
330 | printk("\n" KERN_EMERG "Stack: "); | 333 | printk("\n" KERN_EMERG "Stack: "); |
@@ -332,14 +335,14 @@ void show_registers(struct pt_regs *regs) | |||
332 | 335 | ||
333 | printk(KERN_EMERG "Code: "); | 336 | printk(KERN_EMERG "Code: "); |
334 | 337 | ||
335 | eip = (u8 *)regs->eip - 43; | 338 | eip = (u8 *)regs->eip - code_prologue; |
336 | if (eip < (u8 *)PAGE_OFFSET || | 339 | if (eip < (u8 *)PAGE_OFFSET || |
337 | probe_kernel_address(eip, c)) { | 340 | probe_kernel_address(eip, c)) { |
338 | /* try starting at EIP */ | 341 | /* try starting at EIP */ |
339 | eip = (u8 *)regs->eip; | 342 | eip = (u8 *)regs->eip; |
340 | code_bytes = 32; | 343 | code_len = code_len - code_prologue + 1; |
341 | } | 344 | } |
342 | for (i = 0; i < code_bytes; i++, eip++) { | 345 | for (i = 0; i < code_len; i++, eip++) { |
343 | if (eip < (u8 *)PAGE_OFFSET || | 346 | if (eip < (u8 *)PAGE_OFFSET || |
344 | probe_kernel_address(eip, c)) { | 347 | probe_kernel_address(eip, c)) { |
345 | printk(" Bad EIP value."); | 348 | printk(" Bad EIP value."); |
@@ -1191,3 +1194,13 @@ static int __init kstack_setup(char *s) | |||
1191 | return 1; | 1194 | return 1; |
1192 | } | 1195 | } |
1193 | __setup("kstack=", kstack_setup); | 1196 | __setup("kstack=", kstack_setup); |
1197 | |||
1198 | static int __init code_bytes_setup(char *s) | ||
1199 | { | ||
1200 | code_bytes = simple_strtoul(s, NULL, 0); | ||
1201 | if (code_bytes > 8192) | ||
1202 | code_bytes = 8192; | ||
1203 | |||
1204 | return 1; | ||
1205 | } | ||
1206 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 2cfc7b09b925..46f752a8bbf3 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c | |||
@@ -23,6 +23,7 @@ | |||
23 | * an extra value to store the TSC freq | 23 | * an extra value to store the TSC freq |
24 | */ | 24 | */ |
25 | unsigned int tsc_khz; | 25 | unsigned int tsc_khz; |
26 | unsigned long long (*custom_sched_clock)(void); | ||
26 | 27 | ||
27 | int tsc_disable; | 28 | int tsc_disable; |
28 | 29 | ||
@@ -107,14 +108,14 @@ unsigned long long sched_clock(void) | |||
107 | { | 108 | { |
108 | unsigned long long this_offset; | 109 | unsigned long long this_offset; |
109 | 110 | ||
111 | if (unlikely(custom_sched_clock)) | ||
112 | return (*custom_sched_clock)(); | ||
113 | |||
110 | /* | 114 | /* |
111 | * in the NUMA case we dont use the TSC as they are not | 115 | * Fall back to jiffies if there's no TSC available: |
112 | * synchronized across all CPUs. | ||
113 | */ | 116 | */ |
114 | #ifndef CONFIG_NUMA | 117 | if (unlikely(tsc_disable)) |
115 | if (!cpu_khz || check_tsc_unstable()) | 118 | /* No locking but a rare wrong value is not a big deal: */ |
116 | #endif | ||
117 | /* no locking but a rare wrong value is not a big deal */ | ||
118 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); | 119 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); |
119 | 120 | ||
120 | /* read the Time Stamp Counter: */ | 121 | /* read the Time Stamp Counter: */ |
@@ -194,13 +195,13 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); | |||
194 | void __init tsc_init(void) | 195 | void __init tsc_init(void) |
195 | { | 196 | { |
196 | if (!cpu_has_tsc || tsc_disable) | 197 | if (!cpu_has_tsc || tsc_disable) |
197 | return; | 198 | goto out_no_tsc; |
198 | 199 | ||
199 | cpu_khz = calculate_cpu_khz(); | 200 | cpu_khz = calculate_cpu_khz(); |
200 | tsc_khz = cpu_khz; | 201 | tsc_khz = cpu_khz; |
201 | 202 | ||
202 | if (!cpu_khz) | 203 | if (!cpu_khz) |
203 | return; | 204 | goto out_no_tsc; |
204 | 205 | ||
205 | printk("Detected %lu.%03lu MHz processor.\n", | 206 | printk("Detected %lu.%03lu MHz processor.\n", |
206 | (unsigned long)cpu_khz / 1000, | 207 | (unsigned long)cpu_khz / 1000, |
@@ -208,6 +209,15 @@ void __init tsc_init(void) | |||
208 | 209 | ||
209 | set_cyc2ns_scale(cpu_khz); | 210 | set_cyc2ns_scale(cpu_khz); |
210 | use_tsc_delay(); | 211 | use_tsc_delay(); |
212 | return; | ||
213 | |||
214 | out_no_tsc: | ||
215 | /* | ||
216 | * Set the tsc_disable flag if there's no TSC support, this | ||
217 | * makes it a fast flag for the kernel to see whether it | ||
218 | * should be using the TSC. | ||
219 | */ | ||
220 | tsc_disable = 1; | ||
211 | } | 221 | } |
212 | 222 | ||
213 | #ifdef CONFIG_CPU_FREQ | 223 | #ifdef CONFIG_CPU_FREQ |
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index be2f96e67f78..d1b8f2b7aea6 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c | |||
@@ -96,12 +96,12 @@ static int copy_vm86_regs_to_user(struct vm86_regs __user *user, | |||
96 | { | 96 | { |
97 | int ret = 0; | 97 | int ret = 0; |
98 | 98 | ||
99 | /* kernel_vm86_regs is missing xfs, so copy everything up to | 99 | /* kernel_vm86_regs is missing xgs, so copy everything up to |
100 | (but not including) xgs, and then rest after xgs. */ | 100 | (but not including) orig_eax, and then rest including orig_eax. */ |
101 | ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.xgs)); | 101 | ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_eax)); |
102 | ret += copy_to_user(&user->__null_gs, ®s->pt.xgs, | 102 | ret += copy_to_user(&user->orig_eax, ®s->pt.orig_eax, |
103 | sizeof(struct kernel_vm86_regs) - | 103 | sizeof(struct kernel_vm86_regs) - |
104 | offsetof(struct kernel_vm86_regs, pt.xgs)); | 104 | offsetof(struct kernel_vm86_regs, pt.orig_eax)); |
105 | 105 | ||
106 | return ret; | 106 | return ret; |
107 | } | 107 | } |
@@ -113,12 +113,13 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, | |||
113 | { | 113 | { |
114 | int ret = 0; | 114 | int ret = 0; |
115 | 115 | ||
116 | ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.xgs)); | 116 | /* copy eax-xfs inclusive */ |
117 | ret += copy_from_user(®s->pt.xgs, &user->__null_gs, | 117 | ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_eax)); |
118 | /* copy orig_eax-__gsh+extra */ | ||
119 | ret += copy_from_user(®s->pt.orig_eax, &user->orig_eax, | ||
118 | sizeof(struct kernel_vm86_regs) - | 120 | sizeof(struct kernel_vm86_regs) - |
119 | offsetof(struct kernel_vm86_regs, pt.xgs) + | 121 | offsetof(struct kernel_vm86_regs, pt.orig_eax) + |
120 | extra); | 122 | extra); |
121 | |||
122 | return ret; | 123 | return ret; |
123 | } | 124 | } |
124 | 125 | ||
@@ -157,8 +158,8 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) | |||
157 | 158 | ||
158 | ret = KVM86->regs32; | 159 | ret = KVM86->regs32; |
159 | 160 | ||
160 | loadsegment(fs, current->thread.saved_fs); | 161 | ret->xfs = current->thread.saved_fs; |
161 | ret->xgs = current->thread.saved_gs; | 162 | loadsegment(gs, current->thread.saved_gs); |
162 | 163 | ||
163 | return ret; | 164 | return ret; |
164 | } | 165 | } |
@@ -285,9 +286,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
285 | */ | 286 | */ |
286 | info->regs.pt.xds = 0; | 287 | info->regs.pt.xds = 0; |
287 | info->regs.pt.xes = 0; | 288 | info->regs.pt.xes = 0; |
288 | info->regs.pt.xgs = 0; | 289 | info->regs.pt.xfs = 0; |
289 | 290 | ||
290 | /* we are clearing fs later just before "jmp resume_userspace", | 291 | /* we are clearing gs later just before "jmp resume_userspace", |
291 | * because it is not saved/restored. | 292 | * because it is not saved/restored. |
292 | */ | 293 | */ |
293 | 294 | ||
@@ -321,8 +322,8 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
321 | */ | 322 | */ |
322 | info->regs32->eax = 0; | 323 | info->regs32->eax = 0; |
323 | tsk->thread.saved_esp0 = tsk->thread.esp0; | 324 | tsk->thread.saved_esp0 = tsk->thread.esp0; |
324 | savesegment(fs, tsk->thread.saved_fs); | 325 | tsk->thread.saved_fs = info->regs32->xfs; |
325 | tsk->thread.saved_gs = info->regs32->xgs; | 326 | savesegment(gs, tsk->thread.saved_gs); |
326 | 327 | ||
327 | tss = &per_cpu(init_tss, get_cpu()); | 328 | tss = &per_cpu(init_tss, get_cpu()); |
328 | tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; | 329 | tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; |
@@ -342,7 +343,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
342 | __asm__ __volatile__( | 343 | __asm__ __volatile__( |
343 | "movl %0,%%esp\n\t" | 344 | "movl %0,%%esp\n\t" |
344 | "movl %1,%%ebp\n\t" | 345 | "movl %1,%%ebp\n\t" |
345 | "mov %2, %%fs\n\t" | 346 | "mov %2, %%gs\n\t" |
346 | "jmp resume_userspace" | 347 | "jmp resume_userspace" |
347 | : /* no outputs */ | 348 | : /* no outputs */ |
348 | :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); | 349 | :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); |
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c new file mode 100644 index 000000000000..bb5a7abf949c --- /dev/null +++ b/arch/i386/kernel/vmi.c | |||
@@ -0,0 +1,949 @@ | |||
1 | /* | ||
2 | * VMI specific paravirt-ops implementation | ||
3 | * | ||
4 | * Copyright (C) 2005, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Send feedback to zach@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/module.h> | ||
26 | #include <linux/license.h> | ||
27 | #include <linux/cpu.h> | ||
28 | #include <linux/bootmem.h> | ||
29 | #include <linux/mm.h> | ||
30 | #include <asm/vmi.h> | ||
31 | #include <asm/io.h> | ||
32 | #include <asm/fixmap.h> | ||
33 | #include <asm/apicdef.h> | ||
34 | #include <asm/apic.h> | ||
35 | #include <asm/processor.h> | ||
36 | #include <asm/timer.h> | ||
37 | #include <asm/vmi_time.h> | ||
38 | |||
39 | /* Convenient for calling VMI functions indirectly in the ROM */ | ||
40 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); | ||
41 | typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int); | ||
42 | |||
43 | #define call_vrom_func(rom,func) \ | ||
44 | (((VROMFUNC *)(rom->func))()) | ||
45 | |||
46 | #define call_vrom_long_func(rom,func,arg) \ | ||
47 | (((VROMLONGFUNC *)(rom->func)) (arg)) | ||
48 | |||
49 | static struct vrom_header *vmi_rom; | ||
50 | static int license_gplok; | ||
51 | static int disable_nodelay; | ||
52 | static int disable_pge; | ||
53 | static int disable_pse; | ||
54 | static int disable_sep; | ||
55 | static int disable_tsc; | ||
56 | static int disable_mtrr; | ||
57 | |||
58 | /* Cached VMI operations */ | ||
59 | struct { | ||
60 | void (*cpuid)(void /* non-c */); | ||
61 | void (*_set_ldt)(u32 selector); | ||
62 | void (*set_tr)(u32 selector); | ||
63 | void (*set_kernel_stack)(u32 selector, u32 esp0); | ||
64 | void (*allocate_page)(u32, u32, u32, u32, u32); | ||
65 | void (*release_page)(u32, u32); | ||
66 | void (*set_pte)(pte_t, pte_t *, unsigned); | ||
67 | void (*update_pte)(pte_t *, unsigned); | ||
68 | void (*set_linear_mapping)(int, u32, u32, u32); | ||
69 | void (*flush_tlb)(int); | ||
70 | void (*set_initial_ap_state)(int, int); | ||
71 | void (*halt)(void); | ||
72 | } vmi_ops; | ||
73 | |||
74 | /* XXX move this to alternative.h */ | ||
75 | extern struct paravirt_patch __start_parainstructions[], | ||
76 | __stop_parainstructions[]; | ||
77 | |||
78 | /* | ||
79 | * VMI patching routines. | ||
80 | */ | ||
81 | #define MNEM_CALL 0xe8 | ||
82 | #define MNEM_JMP 0xe9 | ||
83 | #define MNEM_RET 0xc3 | ||
84 | |||
85 | static char irq_save_disable_callout[] = { | ||
86 | MNEM_CALL, 0, 0, 0, 0, | ||
87 | MNEM_CALL, 0, 0, 0, 0, | ||
88 | MNEM_RET | ||
89 | }; | ||
90 | #define IRQ_PATCH_INT_MASK 0 | ||
91 | #define IRQ_PATCH_DISABLE 5 | ||
92 | |||
93 | static inline void patch_offset(unsigned char *eip, unsigned char *dest) | ||
94 | { | ||
95 | *(unsigned long *)(eip+1) = dest-eip-5; | ||
96 | } | ||
97 | |||
98 | static unsigned patch_internal(int call, unsigned len, void *insns) | ||
99 | { | ||
100 | u64 reloc; | ||
101 | struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; | ||
102 | reloc = call_vrom_long_func(vmi_rom, get_reloc, call); | ||
103 | switch(rel->type) { | ||
104 | case VMI_RELOCATION_CALL_REL: | ||
105 | BUG_ON(len < 5); | ||
106 | *(char *)insns = MNEM_CALL; | ||
107 | patch_offset(insns, rel->eip); | ||
108 | return 5; | ||
109 | |||
110 | case VMI_RELOCATION_JUMP_REL: | ||
111 | BUG_ON(len < 5); | ||
112 | *(char *)insns = MNEM_JMP; | ||
113 | patch_offset(insns, rel->eip); | ||
114 | return 5; | ||
115 | |||
116 | case VMI_RELOCATION_NOP: | ||
117 | /* obliterate the whole thing */ | ||
118 | return 0; | ||
119 | |||
120 | case VMI_RELOCATION_NONE: | ||
121 | /* leave native code in place */ | ||
122 | break; | ||
123 | |||
124 | default: | ||
125 | BUG(); | ||
126 | } | ||
127 | return len; | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Apply patch if appropriate, return length of new instruction | ||
132 | * sequence. The callee does nop padding for us. | ||
133 | */ | ||
134 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len) | ||
135 | { | ||
136 | switch (type) { | ||
137 | case PARAVIRT_IRQ_DISABLE: | ||
138 | return patch_internal(VMI_CALL_DisableInterrupts, len, insns); | ||
139 | case PARAVIRT_IRQ_ENABLE: | ||
140 | return patch_internal(VMI_CALL_EnableInterrupts, len, insns); | ||
141 | case PARAVIRT_RESTORE_FLAGS: | ||
142 | return patch_internal(VMI_CALL_SetInterruptMask, len, insns); | ||
143 | case PARAVIRT_SAVE_FLAGS: | ||
144 | return patch_internal(VMI_CALL_GetInterruptMask, len, insns); | ||
145 | case PARAVIRT_SAVE_FLAGS_IRQ_DISABLE: | ||
146 | if (len >= 10) { | ||
147 | patch_internal(VMI_CALL_GetInterruptMask, len, insns); | ||
148 | patch_internal(VMI_CALL_DisableInterrupts, len-5, insns+5); | ||
149 | return 10; | ||
150 | } else { | ||
151 | /* | ||
152 | * You bastards didn't leave enough room to | ||
153 | * patch save_flags_irq_disable inline. Patch | ||
154 | * to a helper | ||
155 | */ | ||
156 | BUG_ON(len < 5); | ||
157 | *(char *)insns = MNEM_CALL; | ||
158 | patch_offset(insns, irq_save_disable_callout); | ||
159 | return 5; | ||
160 | } | ||
161 | case PARAVIRT_INTERRUPT_RETURN: | ||
162 | return patch_internal(VMI_CALL_IRET, len, insns); | ||
163 | case PARAVIRT_STI_SYSEXIT: | ||
164 | return patch_internal(VMI_CALL_SYSEXIT, len, insns); | ||
165 | default: | ||
166 | break; | ||
167 | } | ||
168 | return len; | ||
169 | } | ||
170 | |||
171 | /* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */ | ||
172 | static void vmi_cpuid(unsigned int *eax, unsigned int *ebx, | ||
173 | unsigned int *ecx, unsigned int *edx) | ||
174 | { | ||
175 | int override = 0; | ||
176 | if (*eax == 1) | ||
177 | override = 1; | ||
178 | asm volatile ("call *%6" | ||
179 | : "=a" (*eax), | ||
180 | "=b" (*ebx), | ||
181 | "=c" (*ecx), | ||
182 | "=d" (*edx) | ||
183 | : "0" (*eax), "2" (*ecx), "r" (vmi_ops.cpuid)); | ||
184 | if (override) { | ||
185 | if (disable_pse) | ||
186 | *edx &= ~X86_FEATURE_PSE; | ||
187 | if (disable_pge) | ||
188 | *edx &= ~X86_FEATURE_PGE; | ||
189 | if (disable_sep) | ||
190 | *edx &= ~X86_FEATURE_SEP; | ||
191 | if (disable_tsc) | ||
192 | *edx &= ~X86_FEATURE_TSC; | ||
193 | if (disable_mtrr) | ||
194 | *edx &= ~X86_FEATURE_MTRR; | ||
195 | } | ||
196 | } | ||
197 | |||
198 | static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new) | ||
199 | { | ||
200 | if (gdt[nr].a != new->a || gdt[nr].b != new->b) | ||
201 | write_gdt_entry(gdt, nr, new->a, new->b); | ||
202 | } | ||
203 | |||
204 | static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) | ||
205 | { | ||
206 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | ||
207 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0]); | ||
208 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1]); | ||
209 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2]); | ||
210 | } | ||
211 | |||
212 | static void vmi_set_ldt(const void *addr, unsigned entries) | ||
213 | { | ||
214 | unsigned cpu = smp_processor_id(); | ||
215 | u32 low, high; | ||
216 | |||
217 | pack_descriptor(&low, &high, (unsigned long)addr, | ||
218 | entries * sizeof(struct desc_struct) - 1, | ||
219 | DESCTYPE_LDT, 0); | ||
220 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, low, high); | ||
221 | vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0); | ||
222 | } | ||
223 | |||
224 | static void vmi_set_tr(void) | ||
225 | { | ||
226 | vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct)); | ||
227 | } | ||
228 | |||
229 | static void vmi_load_esp0(struct tss_struct *tss, | ||
230 | struct thread_struct *thread) | ||
231 | { | ||
232 | tss->esp0 = thread->esp0; | ||
233 | |||
234 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | ||
235 | if (unlikely(tss->ss1 != thread->sysenter_cs)) { | ||
236 | tss->ss1 = thread->sysenter_cs; | ||
237 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
238 | } | ||
239 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->esp0); | ||
240 | } | ||
241 | |||
242 | static void vmi_flush_tlb_user(void) | ||
243 | { | ||
244 | vmi_ops.flush_tlb(VMI_FLUSH_TLB); | ||
245 | } | ||
246 | |||
247 | static void vmi_flush_tlb_kernel(void) | ||
248 | { | ||
249 | vmi_ops.flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL); | ||
250 | } | ||
251 | |||
252 | /* Stub to do nothing at all; used for delays and unimplemented calls */ | ||
253 | static void vmi_nop(void) | ||
254 | { | ||
255 | } | ||
256 | |||
257 | /* For NO_IDLE_HZ, we stop the clock when halting the kernel */ | ||
258 | #ifdef CONFIG_NO_IDLE_HZ | ||
259 | static fastcall void vmi_safe_halt(void) | ||
260 | { | ||
261 | int idle = vmi_stop_hz_timer(); | ||
262 | vmi_ops.halt(); | ||
263 | if (idle) { | ||
264 | local_irq_disable(); | ||
265 | vmi_account_time_restart_hz_timer(); | ||
266 | local_irq_enable(); | ||
267 | } | ||
268 | } | ||
269 | #endif | ||
270 | |||
271 | #ifdef CONFIG_DEBUG_PAGE_TYPE | ||
272 | |||
273 | #ifdef CONFIG_X86_PAE | ||
274 | #define MAX_BOOT_PTS (2048+4+1) | ||
275 | #else | ||
276 | #define MAX_BOOT_PTS (1024+1) | ||
277 | #endif | ||
278 | |||
279 | /* | ||
280 | * During boot, mem_map is not yet available in paging_init, so stash | ||
281 | * all the boot page allocations here. | ||
282 | */ | ||
283 | static struct { | ||
284 | u32 pfn; | ||
285 | int type; | ||
286 | } boot_page_allocations[MAX_BOOT_PTS]; | ||
287 | static int num_boot_page_allocations; | ||
288 | static int boot_allocations_applied; | ||
289 | |||
290 | void vmi_apply_boot_page_allocations(void) | ||
291 | { | ||
292 | int i; | ||
293 | BUG_ON(!mem_map); | ||
294 | for (i = 0; i < num_boot_page_allocations; i++) { | ||
295 | struct page *page = pfn_to_page(boot_page_allocations[i].pfn); | ||
296 | page->type = boot_page_allocations[i].type; | ||
297 | page->type = boot_page_allocations[i].type & | ||
298 | ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
299 | } | ||
300 | boot_allocations_applied = 1; | ||
301 | } | ||
302 | |||
303 | static void record_page_type(u32 pfn, int type) | ||
304 | { | ||
305 | BUG_ON(num_boot_page_allocations >= MAX_BOOT_PTS); | ||
306 | boot_page_allocations[num_boot_page_allocations].pfn = pfn; | ||
307 | boot_page_allocations[num_boot_page_allocations].type = type; | ||
308 | num_boot_page_allocations++; | ||
309 | } | ||
310 | |||
311 | static void check_zeroed_page(u32 pfn, int type, struct page *page) | ||
312 | { | ||
313 | u32 *ptr; | ||
314 | int i; | ||
315 | int limit = PAGE_SIZE / sizeof(int); | ||
316 | |||
317 | if (page_address(page)) | ||
318 | ptr = (u32 *)page_address(page); | ||
319 | else | ||
320 | ptr = (u32 *)__va(pfn << PAGE_SHIFT); | ||
321 | /* | ||
322 | * When cloning the root in non-PAE mode, only the userspace | ||
323 | * pdes need to be zeroed. | ||
324 | */ | ||
325 | if (type & VMI_PAGE_CLONE) | ||
326 | limit = USER_PTRS_PER_PGD; | ||
327 | for (i = 0; i < limit; i++) | ||
328 | BUG_ON(ptr[i]); | ||
329 | } | ||
330 | |||
331 | /* | ||
332 | * We stash the page type into struct page so we can verify the page | ||
333 | * types are used properly. | ||
334 | */ | ||
335 | static void vmi_set_page_type(u32 pfn, int type) | ||
336 | { | ||
337 | /* PAE can have multiple roots per page - don't track */ | ||
338 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
339 | return; | ||
340 | |||
341 | if (boot_allocations_applied) { | ||
342 | struct page *page = pfn_to_page(pfn); | ||
343 | if (type != VMI_PAGE_NORMAL) | ||
344 | BUG_ON(page->type); | ||
345 | else | ||
346 | BUG_ON(page->type == VMI_PAGE_NORMAL); | ||
347 | page->type = type & ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
348 | if (type & VMI_PAGE_ZEROED) | ||
349 | check_zeroed_page(pfn, type, page); | ||
350 | } else { | ||
351 | record_page_type(pfn, type); | ||
352 | } | ||
353 | } | ||
354 | |||
355 | static void vmi_check_page_type(u32 pfn, int type) | ||
356 | { | ||
357 | /* PAE can have multiple roots per page - skip checks */ | ||
358 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
359 | return; | ||
360 | |||
361 | type &= ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
362 | if (boot_allocations_applied) { | ||
363 | struct page *page = pfn_to_page(pfn); | ||
364 | BUG_ON((page->type ^ type) & VMI_PAGE_PAE); | ||
365 | BUG_ON(type == VMI_PAGE_NORMAL && page->type); | ||
366 | BUG_ON((type & page->type) == 0); | ||
367 | } | ||
368 | } | ||
369 | #else | ||
370 | #define vmi_set_page_type(p,t) do { } while (0) | ||
371 | #define vmi_check_page_type(p,t) do { } while (0) | ||
372 | #endif | ||
373 | |||
374 | static void vmi_allocate_pt(u32 pfn) | ||
375 | { | ||
376 | vmi_set_page_type(pfn, VMI_PAGE_L1); | ||
377 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | ||
378 | } | ||
379 | |||
380 | static void vmi_allocate_pd(u32 pfn) | ||
381 | { | ||
382 | /* | ||
383 | * This call comes in very early, before mem_map is setup. | ||
384 | * It is called only for swapper_pg_dir, which already has | ||
385 | * data on it. | ||
386 | */ | ||
387 | vmi_set_page_type(pfn, VMI_PAGE_L2); | ||
388 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | ||
389 | } | ||
390 | |||
391 | static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) | ||
392 | { | ||
393 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); | ||
394 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); | ||
395 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | ||
396 | } | ||
397 | |||
398 | static void vmi_release_pt(u32 pfn) | ||
399 | { | ||
400 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | ||
401 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | ||
402 | } | ||
403 | |||
404 | static void vmi_release_pd(u32 pfn) | ||
405 | { | ||
406 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | ||
407 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | ||
408 | } | ||
409 | |||
410 | /* | ||
411 | * Helper macros for MMU update flags. We can defer updates until a flush | ||
412 | * or page invalidation only if the update is to the current address space | ||
413 | * (otherwise, there is no flush). We must check against init_mm, since | ||
414 | * this could be a kernel update, which usually passes init_mm, although | ||
415 | * sometimes this check can be skipped if we know the particular function | ||
416 | * is only called on user mode PTEs. We could change the kernel to pass | ||
417 | * current->active_mm here, but in particular, I was unsure if changing | ||
418 | * mm/highmem.c to do this would still be correct on other architectures. | ||
419 | */ | ||
420 | #define is_current_as(mm, mustbeuser) ((mm) == current->active_mm || \ | ||
421 | (!mustbeuser && (mm) == &init_mm)) | ||
422 | #define vmi_flags_addr(mm, addr, level, user) \ | ||
423 | ((level) | (is_current_as(mm, user) ? \ | ||
424 | (VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | ||
425 | #define vmi_flags_addr_defer(mm, addr, level, user) \ | ||
426 | ((level) | (is_current_as(mm, user) ? \ | ||
427 | (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | ||
428 | |||
429 | static void vmi_update_pte(struct mm_struct *mm, u32 addr, pte_t *ptep) | ||
430 | { | ||
431 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
432 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
433 | } | ||
434 | |||
435 | static void vmi_update_pte_defer(struct mm_struct *mm, u32 addr, pte_t *ptep) | ||
436 | { | ||
437 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
438 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); | ||
439 | } | ||
440 | |||
441 | static void vmi_set_pte(pte_t *ptep, pte_t pte) | ||
442 | { | ||
443 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ | ||
444 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE | VMI_PAGE_PD); | ||
445 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); | ||
446 | } | ||
447 | |||
448 | static void vmi_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) | ||
449 | { | ||
450 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
451 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
452 | } | ||
453 | |||
454 | static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
455 | { | ||
456 | #ifdef CONFIG_X86_PAE | ||
457 | const pte_t pte = { pmdval.pmd, pmdval.pmd >> 32 }; | ||
458 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
459 | #else | ||
460 | const pte_t pte = { pmdval.pud.pgd.pgd }; | ||
461 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
462 | #endif | ||
463 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); | ||
464 | } | ||
465 | |||
466 | #ifdef CONFIG_X86_PAE | ||
467 | |||
468 | static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval) | ||
469 | { | ||
470 | /* | ||
471 | * XXX This is called from set_pmd_pte, but at both PT | ||
472 | * and PD layers so the VMI_PAGE_PT flag is wrong. But | ||
473 | * it is only called for large page mapping changes, | ||
474 | * the Xen backend, doesn't support large pages, and the | ||
475 | * ESX backend doesn't depend on the flag. | ||
476 | */ | ||
477 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); | ||
478 | vmi_ops.update_pte(ptep, VMI_PAGE_PT); | ||
479 | } | ||
480 | |||
481 | static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | ||
482 | { | ||
483 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
484 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); | ||
485 | } | ||
486 | |||
487 | static void vmi_set_pud(pud_t *pudp, pud_t pudval) | ||
488 | { | ||
489 | /* Um, eww */ | ||
490 | const pte_t pte = { pudval.pgd.pgd, pudval.pgd.pgd >> 32 }; | ||
491 | vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
492 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); | ||
493 | } | ||
494 | |||
495 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
496 | { | ||
497 | const pte_t pte = { 0 }; | ||
498 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
499 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
500 | } | ||
501 | |||
502 | void vmi_pmd_clear(pmd_t *pmd) | ||
503 | { | ||
504 | const pte_t pte = { 0 }; | ||
505 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
506 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); | ||
507 | } | ||
508 | #endif | ||
509 | |||
510 | #ifdef CONFIG_SMP | ||
511 | struct vmi_ap_state ap; | ||
512 | extern void setup_pda(void); | ||
513 | |||
514 | static void __init /* XXX cpu hotplug */ | ||
515 | vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | ||
516 | unsigned long start_esp) | ||
517 | { | ||
518 | /* Default everything to zero. This is fine for most GPRs. */ | ||
519 | memset(&ap, 0, sizeof(struct vmi_ap_state)); | ||
520 | |||
521 | ap.gdtr_limit = GDT_SIZE - 1; | ||
522 | ap.gdtr_base = (unsigned long) get_cpu_gdt_table(phys_apicid); | ||
523 | |||
524 | ap.idtr_limit = IDT_ENTRIES * 8 - 1; | ||
525 | ap.idtr_base = (unsigned long) idt_table; | ||
526 | |||
527 | ap.ldtr = 0; | ||
528 | |||
529 | ap.cs = __KERNEL_CS; | ||
530 | ap.eip = (unsigned long) start_eip; | ||
531 | ap.ss = __KERNEL_DS; | ||
532 | ap.esp = (unsigned long) start_esp; | ||
533 | |||
534 | ap.ds = __USER_DS; | ||
535 | ap.es = __USER_DS; | ||
536 | ap.fs = __KERNEL_PDA; | ||
537 | ap.gs = 0; | ||
538 | |||
539 | ap.eflags = 0; | ||
540 | |||
541 | setup_pda(); | ||
542 | |||
543 | #ifdef CONFIG_X86_PAE | ||
544 | /* efer should match BSP efer. */ | ||
545 | if (cpu_has_nx) { | ||
546 | unsigned l, h; | ||
547 | rdmsr(MSR_EFER, l, h); | ||
548 | ap.efer = (unsigned long long) h << 32 | l; | ||
549 | } | ||
550 | #endif | ||
551 | |||
552 | ap.cr3 = __pa(swapper_pg_dir); | ||
553 | /* Protected mode, paging, AM, WP, NE, MP. */ | ||
554 | ap.cr0 = 0x80050023; | ||
555 | ap.cr4 = mmu_cr4_features; | ||
556 | vmi_ops.set_initial_ap_state(__pa(&ap), phys_apicid); | ||
557 | } | ||
558 | #endif | ||
559 | |||
560 | static inline int __init check_vmi_rom(struct vrom_header *rom) | ||
561 | { | ||
562 | struct pci_header *pci; | ||
563 | struct pnp_header *pnp; | ||
564 | const char *manufacturer = "UNKNOWN"; | ||
565 | const char *product = "UNKNOWN"; | ||
566 | const char *license = "unspecified"; | ||
567 | |||
568 | if (rom->rom_signature != 0xaa55) | ||
569 | return 0; | ||
570 | if (rom->vrom_signature != VMI_SIGNATURE) | ||
571 | return 0; | ||
572 | if (rom->api_version_maj != VMI_API_REV_MAJOR || | ||
573 | rom->api_version_min+1 < VMI_API_REV_MINOR+1) { | ||
574 | printk(KERN_WARNING "VMI: Found mismatched rom version %d.%d\n", | ||
575 | rom->api_version_maj, | ||
576 | rom->api_version_min); | ||
577 | return 0; | ||
578 | } | ||
579 | |||
580 | /* | ||
581 | * Relying on the VMI_SIGNATURE field is not 100% safe, so check | ||
582 | * the PCI header and device type to make sure this is really a | ||
583 | * VMI device. | ||
584 | */ | ||
585 | if (!rom->pci_header_offs) { | ||
586 | printk(KERN_WARNING "VMI: ROM does not contain PCI header.\n"); | ||
587 | return 0; | ||
588 | } | ||
589 | |||
590 | pci = (struct pci_header *)((char *)rom+rom->pci_header_offs); | ||
591 | if (pci->vendorID != PCI_VENDOR_ID_VMWARE || | ||
592 | pci->deviceID != PCI_DEVICE_ID_VMWARE_VMI) { | ||
593 | /* Allow it to run... anyways, but warn */ | ||
594 | printk(KERN_WARNING "VMI: ROM from unknown manufacturer\n"); | ||
595 | } | ||
596 | |||
597 | if (rom->pnp_header_offs) { | ||
598 | pnp = (struct pnp_header *)((char *)rom+rom->pnp_header_offs); | ||
599 | if (pnp->manufacturer_offset) | ||
600 | manufacturer = (const char *)rom+pnp->manufacturer_offset; | ||
601 | if (pnp->product_offset) | ||
602 | product = (const char *)rom+pnp->product_offset; | ||
603 | } | ||
604 | |||
605 | if (rom->license_offs) | ||
606 | license = (char *)rom+rom->license_offs; | ||
607 | |||
608 | printk(KERN_INFO "VMI: Found %s %s, API version %d.%d, ROM version %d.%d\n", | ||
609 | manufacturer, product, | ||
610 | rom->api_version_maj, rom->api_version_min, | ||
611 | pci->rom_version_maj, pci->rom_version_min); | ||
612 | |||
613 | license_gplok = license_is_gpl_compatible(license); | ||
614 | if (!license_gplok) { | ||
615 | printk(KERN_WARNING "VMI: ROM license '%s' taints kernel... " | ||
616 | "inlining disabled\n", | ||
617 | license); | ||
618 | add_taint(TAINT_PROPRIETARY_MODULE); | ||
619 | } | ||
620 | return 1; | ||
621 | } | ||
622 | |||
623 | /* | ||
624 | * Probe for the VMI option ROM | ||
625 | */ | ||
626 | static inline int __init probe_vmi_rom(void) | ||
627 | { | ||
628 | unsigned long base; | ||
629 | |||
630 | /* VMI ROM is in option ROM area, check signature */ | ||
631 | for (base = 0xC0000; base < 0xE0000; base += 2048) { | ||
632 | struct vrom_header *romstart; | ||
633 | romstart = (struct vrom_header *)isa_bus_to_virt(base); | ||
634 | if (check_vmi_rom(romstart)) { | ||
635 | vmi_rom = romstart; | ||
636 | return 1; | ||
637 | } | ||
638 | } | ||
639 | return 0; | ||
640 | } | ||
641 | |||
642 | /* | ||
643 | * VMI setup common to all processors | ||
644 | */ | ||
645 | void vmi_bringup(void) | ||
646 | { | ||
647 | /* We must establish the lowmem mapping for MMU ops to work */ | ||
648 | if (vmi_rom) | ||
649 | vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0); | ||
650 | } | ||
651 | |||
652 | /* | ||
653 | * Return a pointer to the VMI function or a NOP stub | ||
654 | */ | ||
655 | static void *vmi_get_function(int vmicall) | ||
656 | { | ||
657 | u64 reloc; | ||
658 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | ||
659 | reloc = call_vrom_long_func(vmi_rom, get_reloc, vmicall); | ||
660 | BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); | ||
661 | if (rel->type == VMI_RELOCATION_CALL_REL) | ||
662 | return (void *)rel->eip; | ||
663 | else | ||
664 | return (void *)vmi_nop; | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * Helper macro for making the VMI paravirt-ops fill code readable. | ||
669 | * For unimplemented operations, fall back to default. | ||
670 | */ | ||
671 | #define para_fill(opname, vmicall) \ | ||
672 | do { \ | ||
673 | reloc = call_vrom_long_func(vmi_rom, get_reloc, \ | ||
674 | VMI_CALL_##vmicall); \ | ||
675 | if (rel->type != VMI_RELOCATION_NONE) { \ | ||
676 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); \ | ||
677 | paravirt_ops.opname = (void *)rel->eip; \ | ||
678 | } \ | ||
679 | } while (0) | ||
680 | |||
681 | /* | ||
682 | * Activate the VMI interface and switch into paravirtualized mode | ||
683 | */ | ||
684 | static inline int __init activate_vmi(void) | ||
685 | { | ||
686 | short kernel_cs; | ||
687 | u64 reloc; | ||
688 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | ||
689 | |||
690 | if (call_vrom_func(vmi_rom, vmi_init) != 0) { | ||
691 | printk(KERN_ERR "VMI ROM failed to initialize!"); | ||
692 | return 0; | ||
693 | } | ||
694 | savesegment(cs, kernel_cs); | ||
695 | |||
696 | paravirt_ops.paravirt_enabled = 1; | ||
697 | paravirt_ops.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; | ||
698 | |||
699 | paravirt_ops.patch = vmi_patch; | ||
700 | paravirt_ops.name = "vmi"; | ||
701 | |||
702 | /* | ||
703 | * Many of these operations are ABI compatible with VMI. | ||
704 | * This means we can fill in the paravirt-ops with direct | ||
705 | * pointers into the VMI ROM. If the calling convention for | ||
706 | * these operations changes, this code needs to be updated. | ||
707 | * | ||
708 | * Exceptions | ||
709 | * CPUID paravirt-op uses pointers, not the native ISA | ||
710 | * halt has no VMI equivalent; all VMI halts are "safe" | ||
711 | * no MSR support yet - just trap and emulate. VMI uses the | ||
712 | * same ABI as the native ISA, but Linux wants exceptions | ||
713 | * from bogus MSR read / write handled | ||
714 | * rdpmc is not yet used in Linux | ||
715 | */ | ||
716 | |||
717 | /* CPUID is special, so very special */ | ||
718 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_CPUID); | ||
719 | if (rel->type != VMI_RELOCATION_NONE) { | ||
720 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
721 | vmi_ops.cpuid = (void *)rel->eip; | ||
722 | paravirt_ops.cpuid = vmi_cpuid; | ||
723 | } | ||
724 | |||
725 | para_fill(clts, CLTS); | ||
726 | para_fill(get_debugreg, GetDR); | ||
727 | para_fill(set_debugreg, SetDR); | ||
728 | para_fill(read_cr0, GetCR0); | ||
729 | para_fill(read_cr2, GetCR2); | ||
730 | para_fill(read_cr3, GetCR3); | ||
731 | para_fill(read_cr4, GetCR4); | ||
732 | para_fill(write_cr0, SetCR0); | ||
733 | para_fill(write_cr2, SetCR2); | ||
734 | para_fill(write_cr3, SetCR3); | ||
735 | para_fill(write_cr4, SetCR4); | ||
736 | para_fill(save_fl, GetInterruptMask); | ||
737 | para_fill(restore_fl, SetInterruptMask); | ||
738 | para_fill(irq_disable, DisableInterrupts); | ||
739 | para_fill(irq_enable, EnableInterrupts); | ||
740 | /* irq_save_disable !!! sheer pain */ | ||
741 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK], | ||
742 | (char *)paravirt_ops.save_fl); | ||
743 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE], | ||
744 | (char *)paravirt_ops.irq_disable); | ||
745 | #ifndef CONFIG_NO_IDLE_HZ | ||
746 | para_fill(safe_halt, Halt); | ||
747 | #else | ||
748 | vmi_ops.halt = vmi_get_function(VMI_CALL_Halt); | ||
749 | paravirt_ops.safe_halt = vmi_safe_halt; | ||
750 | #endif | ||
751 | para_fill(wbinvd, WBINVD); | ||
752 | /* paravirt_ops.read_msr = vmi_rdmsr */ | ||
753 | /* paravirt_ops.write_msr = vmi_wrmsr */ | ||
754 | para_fill(read_tsc, RDTSC); | ||
755 | /* paravirt_ops.rdpmc = vmi_rdpmc */ | ||
756 | |||
757 | /* TR interface doesn't pass TR value */ | ||
758 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetTR); | ||
759 | if (rel->type != VMI_RELOCATION_NONE) { | ||
760 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
761 | vmi_ops.set_tr = (void *)rel->eip; | ||
762 | paravirt_ops.load_tr_desc = vmi_set_tr; | ||
763 | } | ||
764 | |||
765 | /* LDT is special, too */ | ||
766 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetLDT); | ||
767 | if (rel->type != VMI_RELOCATION_NONE) { | ||
768 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
769 | vmi_ops._set_ldt = (void *)rel->eip; | ||
770 | paravirt_ops.set_ldt = vmi_set_ldt; | ||
771 | } | ||
772 | |||
773 | para_fill(load_gdt, SetGDT); | ||
774 | para_fill(load_idt, SetIDT); | ||
775 | para_fill(store_gdt, GetGDT); | ||
776 | para_fill(store_idt, GetIDT); | ||
777 | para_fill(store_tr, GetTR); | ||
778 | paravirt_ops.load_tls = vmi_load_tls; | ||
779 | para_fill(write_ldt_entry, WriteLDTEntry); | ||
780 | para_fill(write_gdt_entry, WriteGDTEntry); | ||
781 | para_fill(write_idt_entry, WriteIDTEntry); | ||
782 | reloc = call_vrom_long_func(vmi_rom, get_reloc, | ||
783 | VMI_CALL_UpdateKernelStack); | ||
784 | if (rel->type != VMI_RELOCATION_NONE) { | ||
785 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
786 | vmi_ops.set_kernel_stack = (void *)rel->eip; | ||
787 | paravirt_ops.load_esp0 = vmi_load_esp0; | ||
788 | } | ||
789 | |||
790 | para_fill(set_iopl_mask, SetIOPLMask); | ||
791 | paravirt_ops.io_delay = (void *)vmi_nop; | ||
792 | if (!disable_nodelay) { | ||
793 | paravirt_ops.const_udelay = (void *)vmi_nop; | ||
794 | } | ||
795 | |||
796 | para_fill(set_lazy_mode, SetLazyMode); | ||
797 | |||
798 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_FlushTLB); | ||
799 | if (rel->type != VMI_RELOCATION_NONE) { | ||
800 | vmi_ops.flush_tlb = (void *)rel->eip; | ||
801 | paravirt_ops.flush_tlb_user = vmi_flush_tlb_user; | ||
802 | paravirt_ops.flush_tlb_kernel = vmi_flush_tlb_kernel; | ||
803 | } | ||
804 | para_fill(flush_tlb_single, InvalPage); | ||
805 | |||
806 | /* | ||
807 | * Until a standard flag format can be agreed on, we need to | ||
808 | * implement these as wrappers in Linux. Get the VMI ROM | ||
809 | * function pointers for the two backend calls. | ||
810 | */ | ||
811 | #ifdef CONFIG_X86_PAE | ||
812 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxELong); | ||
813 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxELong); | ||
814 | #else | ||
815 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE); | ||
816 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE); | ||
817 | #endif | ||
818 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); | ||
819 | vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); | ||
820 | vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); | ||
821 | |||
822 | paravirt_ops.alloc_pt = vmi_allocate_pt; | ||
823 | paravirt_ops.alloc_pd = vmi_allocate_pd; | ||
824 | paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; | ||
825 | paravirt_ops.release_pt = vmi_release_pt; | ||
826 | paravirt_ops.release_pd = vmi_release_pd; | ||
827 | paravirt_ops.set_pte = vmi_set_pte; | ||
828 | paravirt_ops.set_pte_at = vmi_set_pte_at; | ||
829 | paravirt_ops.set_pmd = vmi_set_pmd; | ||
830 | paravirt_ops.pte_update = vmi_update_pte; | ||
831 | paravirt_ops.pte_update_defer = vmi_update_pte_defer; | ||
832 | #ifdef CONFIG_X86_PAE | ||
833 | paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; | ||
834 | paravirt_ops.set_pte_present = vmi_set_pte_present; | ||
835 | paravirt_ops.set_pud = vmi_set_pud; | ||
836 | paravirt_ops.pte_clear = vmi_pte_clear; | ||
837 | paravirt_ops.pmd_clear = vmi_pmd_clear; | ||
838 | #endif | ||
839 | /* | ||
840 | * These MUST always be patched. Don't support indirect jumps | ||
841 | * through these operations, as the VMI interface may use either | ||
842 | * a jump or a call to get to these operations, depending on | ||
843 | * the backend. They are performance critical anyway, so requiring | ||
844 | * a patch is not a big problem. | ||
845 | */ | ||
846 | paravirt_ops.irq_enable_sysexit = (void *)0xfeedbab0; | ||
847 | paravirt_ops.iret = (void *)0xbadbab0; | ||
848 | |||
849 | #ifdef CONFIG_SMP | ||
850 | paravirt_ops.startup_ipi_hook = vmi_startup_ipi_hook; | ||
851 | vmi_ops.set_initial_ap_state = vmi_get_function(VMI_CALL_SetInitialAPState); | ||
852 | #endif | ||
853 | |||
854 | #ifdef CONFIG_X86_LOCAL_APIC | ||
855 | paravirt_ops.apic_read = vmi_get_function(VMI_CALL_APICRead); | ||
856 | paravirt_ops.apic_write = vmi_get_function(VMI_CALL_APICWrite); | ||
857 | paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite); | ||
858 | #endif | ||
859 | |||
860 | /* | ||
861 | * Check for VMI timer functionality by probing for a cycle frequency method | ||
862 | */ | ||
863 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency); | ||
864 | if (rel->type != VMI_RELOCATION_NONE) { | ||
865 | vmi_timer_ops.get_cycle_frequency = (void *)rel->eip; | ||
866 | vmi_timer_ops.get_cycle_counter = | ||
867 | vmi_get_function(VMI_CALL_GetCycleCounter); | ||
868 | vmi_timer_ops.get_wallclock = | ||
869 | vmi_get_function(VMI_CALL_GetWallclockTime); | ||
870 | vmi_timer_ops.wallclock_updated = | ||
871 | vmi_get_function(VMI_CALL_WallclockUpdated); | ||
872 | vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); | ||
873 | vmi_timer_ops.cancel_alarm = | ||
874 | vmi_get_function(VMI_CALL_CancelAlarm); | ||
875 | paravirt_ops.time_init = vmi_time_init; | ||
876 | paravirt_ops.get_wallclock = vmi_get_wallclock; | ||
877 | paravirt_ops.set_wallclock = vmi_set_wallclock; | ||
878 | #ifdef CONFIG_X86_LOCAL_APIC | ||
879 | paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; | ||
880 | paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; | ||
881 | #endif | ||
882 | custom_sched_clock = vmi_sched_clock; | ||
883 | } | ||
884 | |||
885 | /* | ||
886 | * Alternative instruction rewriting doesn't happen soon enough | ||
887 | * to convert VMI_IRET to a call instead of a jump; so we have | ||
888 | * to do this before IRQs get reenabled. Fortunately, it is | ||
889 | * idempotent. | ||
890 | */ | ||
891 | apply_paravirt(__start_parainstructions, __stop_parainstructions); | ||
892 | |||
893 | vmi_bringup(); | ||
894 | |||
895 | return 1; | ||
896 | } | ||
897 | |||
898 | #undef para_fill | ||
899 | |||
900 | void __init vmi_init(void) | ||
901 | { | ||
902 | unsigned long flags; | ||
903 | |||
904 | if (!vmi_rom) | ||
905 | probe_vmi_rom(); | ||
906 | else | ||
907 | check_vmi_rom(vmi_rom); | ||
908 | |||
909 | /* In case probing for or validating the ROM failed, basil */ | ||
910 | if (!vmi_rom) | ||
911 | return; | ||
912 | |||
913 | reserve_top_address(-vmi_rom->virtual_top); | ||
914 | |||
915 | local_irq_save(flags); | ||
916 | activate_vmi(); | ||
917 | #ifdef CONFIG_SMP | ||
918 | no_timer_check = 1; | ||
919 | #endif | ||
920 | local_irq_restore(flags & X86_EFLAGS_IF); | ||
921 | } | ||
922 | |||
923 | static int __init parse_vmi(char *arg) | ||
924 | { | ||
925 | if (!arg) | ||
926 | return -EINVAL; | ||
927 | |||
928 | if (!strcmp(arg, "disable_nodelay")) | ||
929 | disable_nodelay = 1; | ||
930 | else if (!strcmp(arg, "disable_pge")) { | ||
931 | clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); | ||
932 | disable_pge = 1; | ||
933 | } else if (!strcmp(arg, "disable_pse")) { | ||
934 | clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); | ||
935 | disable_pse = 1; | ||
936 | } else if (!strcmp(arg, "disable_sep")) { | ||
937 | clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); | ||
938 | disable_sep = 1; | ||
939 | } else if (!strcmp(arg, "disable_tsc")) { | ||
940 | clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); | ||
941 | disable_tsc = 1; | ||
942 | } else if (!strcmp(arg, "disable_mtrr")) { | ||
943 | clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability); | ||
944 | disable_mtrr = 1; | ||
945 | } | ||
946 | return 0; | ||
947 | } | ||
948 | |||
949 | early_param("vmi", parse_vmi); | ||
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c new file mode 100644 index 000000000000..2e2d8dbcbd68 --- /dev/null +++ b/arch/i386/kernel/vmitime.c | |||
@@ -0,0 +1,499 @@ | |||
1 | /* | ||
2 | * VMI paravirtual timer support routines. | ||
3 | * | ||
4 | * Copyright (C) 2005, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Send feedback to dhecht@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * Portions of this code from arch/i386/kernel/timers/timer_tsc.c. | ||
27 | * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c. | ||
28 | * See comments there for proper credits. | ||
29 | */ | ||
30 | |||
31 | #include <linux/spinlock.h> | ||
32 | #include <linux/init.h> | ||
33 | #include <linux/errno.h> | ||
34 | #include <linux/jiffies.h> | ||
35 | #include <linux/interrupt.h> | ||
36 | #include <linux/kernel_stat.h> | ||
37 | #include <linux/rcupdate.h> | ||
38 | #include <linux/clocksource.h> | ||
39 | |||
40 | #include <asm/timer.h> | ||
41 | #include <asm/io.h> | ||
42 | #include <asm/apic.h> | ||
43 | #include <asm/div64.h> | ||
44 | #include <asm/timer.h> | ||
45 | #include <asm/desc.h> | ||
46 | |||
47 | #include <asm/vmi.h> | ||
48 | #include <asm/vmi_time.h> | ||
49 | |||
50 | #include <mach_timer.h> | ||
51 | #include <io_ports.h> | ||
52 | |||
53 | #ifdef CONFIG_X86_LOCAL_APIC | ||
54 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT | ||
55 | #else | ||
56 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0 | ||
57 | #endif | ||
58 | |||
59 | /* Cached VMI operations */ | ||
60 | struct vmi_timer_ops vmi_timer_ops; | ||
61 | |||
62 | #ifdef CONFIG_NO_IDLE_HZ | ||
63 | |||
64 | /* /proc/sys/kernel/hz_timer state. */ | ||
65 | int sysctl_hz_timer; | ||
66 | |||
67 | /* Some stats */ | ||
68 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs); | ||
69 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies); | ||
70 | static DEFINE_PER_CPU(unsigned long, idle_start_jiffies); | ||
71 | |||
72 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
73 | |||
74 | /* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */ | ||
75 | static int alarm_hz = CONFIG_VMI_ALARM_HZ; | ||
76 | |||
77 | /* Cache of the value get_cycle_frequency / HZ. */ | ||
78 | static signed long long cycles_per_jiffy; | ||
79 | |||
80 | /* Cache of the value get_cycle_frequency / alarm_hz. */ | ||
81 | static signed long long cycles_per_alarm; | ||
82 | |||
83 | /* The number of cycles accounted for by the 'jiffies'/'xtime' count. | ||
84 | * Protected by xtime_lock. */ | ||
85 | static unsigned long long real_cycles_accounted_system; | ||
86 | |||
87 | /* The number of cycles accounted for by update_process_times(), per cpu. */ | ||
88 | static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu); | ||
89 | |||
90 | /* The number of stolen cycles accounted, per cpu. */ | ||
91 | static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu); | ||
92 | |||
93 | /* Clock source. */ | ||
94 | static cycle_t read_real_cycles(void) | ||
95 | { | ||
96 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | ||
97 | } | ||
98 | |||
99 | static cycle_t read_available_cycles(void) | ||
100 | { | ||
101 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); | ||
102 | } | ||
103 | |||
104 | #if 0 | ||
105 | static cycle_t read_stolen_cycles(void) | ||
106 | { | ||
107 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN); | ||
108 | } | ||
109 | #endif /* 0 */ | ||
110 | |||
111 | static struct clocksource clocksource_vmi = { | ||
112 | .name = "vmi-timer", | ||
113 | .rating = 450, | ||
114 | .read = read_real_cycles, | ||
115 | .mask = CLOCKSOURCE_MASK(64), | ||
116 | .mult = 0, /* to be set */ | ||
117 | .shift = 22, | ||
118 | .is_continuous = 1, | ||
119 | }; | ||
120 | |||
121 | |||
122 | /* Timer interrupt handler. */ | ||
123 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id); | ||
124 | |||
125 | static struct irqaction vmi_timer_irq = { | ||
126 | vmi_timer_interrupt, | ||
127 | SA_INTERRUPT, | ||
128 | CPU_MASK_NONE, | ||
129 | "VMI-alarm", | ||
130 | NULL, | ||
131 | NULL | ||
132 | }; | ||
133 | |||
134 | /* Alarm rate */ | ||
135 | static int __init vmi_timer_alarm_rate_setup(char* str) | ||
136 | { | ||
137 | int alarm_rate; | ||
138 | if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) { | ||
139 | alarm_hz = alarm_rate; | ||
140 | printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz); | ||
141 | } | ||
142 | return 1; | ||
143 | } | ||
144 | __setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup); | ||
145 | |||
146 | |||
147 | /* Initialization */ | ||
148 | static void vmi_get_wallclock_ts(struct timespec *ts) | ||
149 | { | ||
150 | unsigned long long wallclock; | ||
151 | wallclock = vmi_timer_ops.get_wallclock(); // nsec units | ||
152 | ts->tv_nsec = do_div(wallclock, 1000000000); | ||
153 | ts->tv_sec = wallclock; | ||
154 | } | ||
155 | |||
156 | static void update_xtime_from_wallclock(void) | ||
157 | { | ||
158 | struct timespec ts; | ||
159 | vmi_get_wallclock_ts(&ts); | ||
160 | do_settimeofday(&ts); | ||
161 | } | ||
162 | |||
163 | unsigned long vmi_get_wallclock(void) | ||
164 | { | ||
165 | struct timespec ts; | ||
166 | vmi_get_wallclock_ts(&ts); | ||
167 | return ts.tv_sec; | ||
168 | } | ||
169 | |||
170 | int vmi_set_wallclock(unsigned long now) | ||
171 | { | ||
172 | return -1; | ||
173 | } | ||
174 | |||
175 | unsigned long long vmi_sched_clock(void) | ||
176 | { | ||
177 | return read_available_cycles(); | ||
178 | } | ||
179 | |||
180 | void __init vmi_time_init(void) | ||
181 | { | ||
182 | unsigned long long cycles_per_sec, cycles_per_msec; | ||
183 | unsigned long flags; | ||
184 | |||
185 | local_irq_save(flags); | ||
186 | setup_irq(0, &vmi_timer_irq); | ||
187 | #ifdef CONFIG_X86_LOCAL_APIC | ||
188 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt); | ||
189 | #endif | ||
190 | |||
191 | no_sync_cmos_clock = 1; | ||
192 | |||
193 | vmi_get_wallclock_ts(&xtime); | ||
194 | set_normalized_timespec(&wall_to_monotonic, | ||
195 | -xtime.tv_sec, -xtime.tv_nsec); | ||
196 | |||
197 | real_cycles_accounted_system = read_real_cycles(); | ||
198 | update_xtime_from_wallclock(); | ||
199 | per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles(); | ||
200 | |||
201 | cycles_per_sec = vmi_timer_ops.get_cycle_frequency(); | ||
202 | |||
203 | cycles_per_jiffy = cycles_per_sec; | ||
204 | (void)do_div(cycles_per_jiffy, HZ); | ||
205 | cycles_per_alarm = cycles_per_sec; | ||
206 | (void)do_div(cycles_per_alarm, alarm_hz); | ||
207 | cycles_per_msec = cycles_per_sec; | ||
208 | (void)do_div(cycles_per_msec, 1000); | ||
209 | cpu_khz = cycles_per_msec; | ||
210 | |||
211 | printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;" | ||
212 | "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy, | ||
213 | cycles_per_alarm); | ||
214 | |||
215 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | ||
216 | clocksource_vmi.shift); | ||
217 | if (clocksource_register(&clocksource_vmi)) | ||
218 | printk(KERN_WARNING "Error registering VMITIME clocksource."); | ||
219 | |||
220 | /* Disable PIT. */ | ||
221 | outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | ||
222 | |||
223 | /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu | ||
224 | * reduce the latency calling update_process_times. */ | ||
225 | vmi_timer_ops.set_alarm( | ||
226 | VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
227 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
228 | cycles_per_alarm); | ||
229 | |||
230 | local_irq_restore(flags); | ||
231 | } | ||
232 | |||
233 | #ifdef CONFIG_X86_LOCAL_APIC | ||
234 | |||
235 | void __init vmi_timer_setup_boot_alarm(void) | ||
236 | { | ||
237 | local_irq_disable(); | ||
238 | |||
239 | /* Route the interrupt to the correct vector. */ | ||
240 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
241 | |||
242 | /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */ | ||
243 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
244 | vmi_timer_ops.set_alarm( | ||
245 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
246 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
247 | cycles_per_alarm); | ||
248 | local_irq_enable(); | ||
249 | } | ||
250 | |||
251 | /* Initialize the time accounting variables for an AP on an SMP system. | ||
252 | * Also, set the local alarm for the AP. */ | ||
253 | void __init vmi_timer_setup_secondary_alarm(void) | ||
254 | { | ||
255 | int cpu = smp_processor_id(); | ||
256 | |||
257 | /* Route the interrupt to the correct vector. */ | ||
258 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
259 | |||
260 | per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles(); | ||
261 | |||
262 | vmi_timer_ops.set_alarm( | ||
263 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
264 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
265 | cycles_per_alarm); | ||
266 | } | ||
267 | |||
268 | #endif | ||
269 | |||
270 | /* Update system wide (real) time accounting (e.g. jiffies, xtime). */ | ||
271 | static void vmi_account_real_cycles(unsigned long long cur_real_cycles) | ||
272 | { | ||
273 | long long cycles_not_accounted; | ||
274 | |||
275 | write_seqlock(&xtime_lock); | ||
276 | |||
277 | cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system; | ||
278 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
279 | /* systems wide jiffies and wallclock. */ | ||
280 | do_timer(1); | ||
281 | |||
282 | cycles_not_accounted -= cycles_per_jiffy; | ||
283 | real_cycles_accounted_system += cycles_per_jiffy; | ||
284 | } | ||
285 | |||
286 | if (vmi_timer_ops.wallclock_updated()) | ||
287 | update_xtime_from_wallclock(); | ||
288 | |||
289 | write_sequnlock(&xtime_lock); | ||
290 | } | ||
291 | |||
292 | /* Update per-cpu process times. */ | ||
293 | static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu, | ||
294 | unsigned long long cur_process_times_cycles) | ||
295 | { | ||
296 | long long cycles_not_accounted; | ||
297 | cycles_not_accounted = cur_process_times_cycles - | ||
298 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
299 | |||
300 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
301 | /* Account time to the current process. This includes | ||
302 | * calling into the scheduler to decrement the timeslice | ||
303 | * and possibly reschedule.*/ | ||
304 | update_process_times(user_mode(regs)); | ||
305 | /* XXX handle /proc/profile multiplier. */ | ||
306 | profile_tick(CPU_PROFILING); | ||
307 | |||
308 | cycles_not_accounted -= cycles_per_jiffy; | ||
309 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
310 | } | ||
311 | } | ||
312 | |||
313 | #ifdef CONFIG_NO_IDLE_HZ | ||
314 | /* Update per-cpu idle times. Used when a no-hz halt is ended. */ | ||
315 | static void vmi_account_no_hz_idle_cycles(int cpu, | ||
316 | unsigned long long cur_process_times_cycles) | ||
317 | { | ||
318 | long long cycles_not_accounted; | ||
319 | unsigned long no_idle_hz_jiffies = 0; | ||
320 | |||
321 | cycles_not_accounted = cur_process_times_cycles - | ||
322 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
323 | |||
324 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
325 | no_idle_hz_jiffies++; | ||
326 | cycles_not_accounted -= cycles_per_jiffy; | ||
327 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
328 | } | ||
329 | /* Account time to the idle process. */ | ||
330 | account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies)); | ||
331 | } | ||
332 | #endif | ||
333 | |||
334 | /* Update per-cpu stolen time. */ | ||
335 | static void vmi_account_stolen_cycles(int cpu, | ||
336 | unsigned long long cur_real_cycles, | ||
337 | unsigned long long cur_avail_cycles) | ||
338 | { | ||
339 | long long stolen_cycles_not_accounted; | ||
340 | unsigned long stolen_jiffies = 0; | ||
341 | |||
342 | if (cur_real_cycles < cur_avail_cycles) | ||
343 | return; | ||
344 | |||
345 | stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles - | ||
346 | per_cpu(stolen_cycles_accounted_cpu, cpu); | ||
347 | |||
348 | while (stolen_cycles_not_accounted >= cycles_per_jiffy) { | ||
349 | stolen_jiffies++; | ||
350 | stolen_cycles_not_accounted -= cycles_per_jiffy; | ||
351 | per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
352 | } | ||
353 | /* HACK: pass NULL to force time onto cpustat->steal. */ | ||
354 | account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies)); | ||
355 | } | ||
356 | |||
357 | /* Body of either IRQ0 interrupt handler (UP no local-APIC) or | ||
358 | * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */ | ||
359 | static void vmi_local_timer_interrupt(int cpu) | ||
360 | { | ||
361 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
362 | |||
363 | cur_real_cycles = read_real_cycles(); | ||
364 | cur_process_times_cycles = read_available_cycles(); | ||
365 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
366 | vmi_account_real_cycles(cur_real_cycles); | ||
367 | /* Update per-cpu process times. */ | ||
368 | vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles); | ||
369 | /* Update time stolen from this cpu by the hypervisor. */ | ||
370 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
371 | } | ||
372 | |||
373 | #ifdef CONFIG_NO_IDLE_HZ | ||
374 | |||
375 | /* Must be called only from idle loop, with interrupts disabled. */ | ||
376 | int vmi_stop_hz_timer(void) | ||
377 | { | ||
378 | /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */ | ||
379 | |||
380 | unsigned long seq, next; | ||
381 | unsigned long long real_cycles_expiry; | ||
382 | int cpu = smp_processor_id(); | ||
383 | int idle; | ||
384 | |||
385 | BUG_ON(!irqs_disabled()); | ||
386 | if (sysctl_hz_timer != 0) | ||
387 | return 0; | ||
388 | |||
389 | cpu_set(cpu, nohz_cpu_mask); | ||
390 | smp_mb(); | ||
391 | if (rcu_needs_cpu(cpu) || local_softirq_pending() || | ||
392 | (next = next_timer_interrupt(), time_before_eq(next, jiffies))) { | ||
393 | cpu_clear(cpu, nohz_cpu_mask); | ||
394 | next = jiffies; | ||
395 | idle = 0; | ||
396 | } else | ||
397 | idle = 1; | ||
398 | |||
399 | /* Convert jiffies to the real cycle counter. */ | ||
400 | do { | ||
401 | seq = read_seqbegin(&xtime_lock); | ||
402 | real_cycles_expiry = real_cycles_accounted_system + | ||
403 | (long)(next - jiffies) * cycles_per_jiffy; | ||
404 | } while (read_seqretry(&xtime_lock, seq)); | ||
405 | |||
406 | /* This cpu is going idle. Disable the periodic alarm. */ | ||
407 | if (idle) { | ||
408 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
409 | per_cpu(idle_start_jiffies, cpu) = jiffies; | ||
410 | } | ||
411 | |||
412 | /* Set the real time alarm to expire at the next event. */ | ||
413 | vmi_timer_ops.set_alarm( | ||
414 | VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, | ||
415 | real_cycles_expiry, 0); | ||
416 | |||
417 | return idle; | ||
418 | } | ||
419 | |||
420 | static void vmi_reenable_hz_timer(int cpu) | ||
421 | { | ||
422 | /* For /proc/vmi/info idle_hz stat. */ | ||
423 | per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu); | ||
424 | per_cpu(vmi_idle_no_hz_irqs, cpu)++; | ||
425 | |||
426 | /* Don't bother explicitly cancelling the one-shot alarm -- at | ||
427 | * worse we will receive a spurious timer interrupt. */ | ||
428 | vmi_timer_ops.set_alarm( | ||
429 | VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
430 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
431 | cycles_per_alarm); | ||
432 | /* Indicate this cpu is no longer nohz idle. */ | ||
433 | cpu_clear(cpu, nohz_cpu_mask); | ||
434 | } | ||
435 | |||
436 | /* Called from interrupt handlers when (local) HZ timer is disabled. */ | ||
437 | void vmi_account_time_restart_hz_timer(void) | ||
438 | { | ||
439 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
440 | int cpu = smp_processor_id(); | ||
441 | |||
442 | BUG_ON(!irqs_disabled()); | ||
443 | /* Account the time during which the HZ timer was disabled. */ | ||
444 | cur_real_cycles = read_real_cycles(); | ||
445 | cur_process_times_cycles = read_available_cycles(); | ||
446 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
447 | vmi_account_real_cycles(cur_real_cycles); | ||
448 | /* Update per-cpu idle times. */ | ||
449 | vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles); | ||
450 | /* Update time stolen from this cpu by the hypervisor. */ | ||
451 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
452 | /* Reenable the hz timer. */ | ||
453 | vmi_reenable_hz_timer(cpu); | ||
454 | } | ||
455 | |||
456 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
457 | |||
458 | /* UP (and no local-APIC) VMI-timer alarm interrupt handler. | ||
459 | * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after | ||
460 | * APIC setup and setup_boot_vmi_alarm() is called. */ | ||
461 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | ||
462 | { | ||
463 | vmi_local_timer_interrupt(smp_processor_id()); | ||
464 | return IRQ_HANDLED; | ||
465 | } | ||
466 | |||
467 | #ifdef CONFIG_X86_LOCAL_APIC | ||
468 | |||
469 | /* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector. | ||
470 | * Also used in UP when CONFIG_X86_LOCAL_APIC. | ||
471 | * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */ | ||
472 | void smp_apic_vmi_timer_interrupt(struct pt_regs *regs) | ||
473 | { | ||
474 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
475 | int cpu = smp_processor_id(); | ||
476 | |||
477 | /* | ||
478 | * the NMI deadlock-detector uses this. | ||
479 | */ | ||
480 | per_cpu(irq_stat,cpu).apic_timer_irqs++; | ||
481 | |||
482 | /* | ||
483 | * NOTE! We'd better ACK the irq immediately, | ||
484 | * because timer handling can be slow. | ||
485 | */ | ||
486 | ack_APIC_irq(); | ||
487 | |||
488 | /* | ||
489 | * update_process_times() expects us to have done irq_enter(). | ||
490 | * Besides, if we don't timer interrupts ignore the global | ||
491 | * interrupt lock, which is the WrongThing (tm) to do. | ||
492 | */ | ||
493 | irq_enter(); | ||
494 | vmi_local_timer_interrupt(cpu); | ||
495 | irq_exit(); | ||
496 | set_irq_regs(old_regs); | ||
497 | } | ||
498 | |||
499 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 5038a73d554e..ca51610955df 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S | |||
@@ -37,9 +37,14 @@ SECTIONS | |||
37 | { | 37 | { |
38 | . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; | 38 | . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; |
39 | phys_startup_32 = startup_32 - LOAD_OFFSET; | 39 | phys_startup_32 = startup_32 - LOAD_OFFSET; |
40 | |||
41 | .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { | ||
42 | _text = .; /* Text and read-only data */ | ||
43 | *(.text.head) | ||
44 | } :text = 0x9090 | ||
45 | |||
40 | /* read-only */ | 46 | /* read-only */ |
41 | .text : AT(ADDR(.text) - LOAD_OFFSET) { | 47 | .text : AT(ADDR(.text) - LOAD_OFFSET) { |
42 | _text = .; /* Text and read-only data */ | ||
43 | *(.text) | 48 | *(.text) |
44 | SCHED_TEXT | 49 | SCHED_TEXT |
45 | LOCK_TEXT | 50 | LOCK_TEXT |