aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/oprofile/common.c2
-rw-r--r--arch/arm/common/locomo.c45
-rw-r--r--arch/i386/Kconfig24
-rw-r--r--arch/i386/boot/video.S19
-rw-r--r--arch/i386/kernel/Makefile4
-rw-r--r--arch/i386/kernel/hpet.c67
-rw-r--r--arch/i386/kernel/i8253.c118
-rw-r--r--arch/i386/kernel/kprobes.c95
-rw-r--r--arch/i386/kernel/numaq.c10
-rw-r--r--arch/i386/kernel/setup.c1
-rw-r--r--arch/i386/kernel/time.c157
-rw-r--r--arch/i386/kernel/timers/Makefile9
-rw-r--r--arch/i386/kernel/timers/common.c172
-rw-r--r--arch/i386/kernel/timers/timer.c75
-rw-r--r--arch/i386/kernel/timers/timer_cyclone.c259
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c217
-rw-r--r--arch/i386/kernel/timers/timer_none.c39
-rw-r--r--arch/i386/kernel/timers/timer_pit.c177
-rw-r--r--arch/i386/kernel/timers/timer_pm.c342
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c617
-rw-r--r--arch/i386/kernel/tsc.c478
-rw-r--r--arch/i386/lib/delay.c65
-rw-r--r--arch/i386/mm/fault.c38
-rw-r--r--arch/i386/oprofile/nmi_int.c4
-rw-r--r--arch/i386/pci/pcbios.c6
-rw-r--r--arch/ia64/mm/fault.c36
-rw-r--r--arch/m68k/mm/memory.c6
-rw-r--r--arch/m68k/sun3/sun3dvma.c6
-rw-r--r--arch/mips/oprofile/common.c2
-rw-r--r--arch/powerpc/kernel/time.c2
-rw-r--r--arch/powerpc/mm/fault.c36
-rw-r--r--arch/powerpc/oprofile/common.c2
-rw-r--r--arch/sh/oprofile/op_model_sh7750.c2
-rw-r--r--arch/sparc64/mm/fault.c36
-rw-r--r--arch/x86_64/boot/video.S19
-rw-r--r--arch/x86_64/kernel/pmtimer.c2
-rw-r--r--arch/x86_64/kernel/setup.c1
-rw-r--r--arch/x86_64/mm/fault.c39
38 files changed, 1090 insertions, 2139 deletions
diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c
index ba788cfdc3c6..9fc0eeb4f0ab 100644
--- a/arch/alpha/oprofile/common.c
+++ b/arch/alpha/oprofile/common.c
@@ -112,7 +112,7 @@ op_axp_create_files(struct super_block * sb, struct dentry * root)
112 112
113 for (i = 0; i < model->num_counters; ++i) { 113 for (i = 0; i < model->num_counters; ++i) {
114 struct dentry *dir; 114 struct dentry *dir;
115 char buf[3]; 115 char buf[4];
116 116
117 snprintf(buf, sizeof buf, "%d", i); 117 snprintf(buf, sizeof buf, "%d", i);
118 dir = oprofilefs_mkdir(sb, root, buf); 118 dir = oprofilefs_mkdir(sb, root, buf);
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index a7dc1370695b..0dafba3a701d 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -629,21 +629,6 @@ static int locomo_resume(struct platform_device *dev)
629#endif 629#endif
630 630
631 631
632#define LCM_ALC_EN 0x8000
633
634void frontlight_set(struct locomo *lchip, int duty, int vr, int bpwf)
635{
636 unsigned long flags;
637
638 spin_lock_irqsave(&lchip->lock, flags);
639 locomo_writel(bpwf, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
640 udelay(100);
641 locomo_writel(duty, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALD);
642 locomo_writel(bpwf | LCM_ALC_EN, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
643 spin_unlock_irqrestore(&lchip->lock, flags);
644}
645
646
647/** 632/**
648 * locomo_probe - probe for a single LoCoMo chip. 633 * locomo_probe - probe for a single LoCoMo chip.
649 * @phys_addr: physical address of device. 634 * @phys_addr: physical address of device.
@@ -698,14 +683,10 @@ __locomo_probe(struct device *me, struct resource *mem, int irq)
698 , lchip->base + LOCOMO_GPD); 683 , lchip->base + LOCOMO_GPD);
699 locomo_writel(0, lchip->base + LOCOMO_GIE); 684 locomo_writel(0, lchip->base + LOCOMO_GIE);
700 685
701 /* FrontLight */ 686 /* Frontlight */
702 locomo_writel(0, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS); 687 locomo_writel(0, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
703 locomo_writel(0, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALD); 688 locomo_writel(0, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALD);
704 689
705 /* Same constants can be used for collie and poodle
706 (depending on CONFIG options in original sharp code)? */
707 frontlight_set(lchip, 163, 0, 148);
708
709 /* Longtime timer */ 690 /* Longtime timer */
710 locomo_writel(0, lchip->base + LOCOMO_LTINT); 691 locomo_writel(0, lchip->base + LOCOMO_LTINT);
711 /* SPI */ 692 /* SPI */
@@ -1063,6 +1044,30 @@ void locomo_m62332_senddata(struct locomo_dev *ldev, unsigned int dac_data, int
1063} 1044}
1064 1045
1065/* 1046/*
1047 * Frontlight control
1048 */
1049
1050static struct locomo *locomo_chip_driver(struct locomo_dev *ldev);
1051
1052void locomo_frontlight_set(struct locomo_dev *dev, int duty, int vr, int bpwf)
1053{
1054 unsigned long flags;
1055 struct locomo *lchip = locomo_chip_driver(dev);
1056
1057 if (vr)
1058 locomo_gpio_write(dev, LOCOMO_GPIO_FL_VR, 1);
1059 else
1060 locomo_gpio_write(dev, LOCOMO_GPIO_FL_VR, 0);
1061
1062 spin_lock_irqsave(&lchip->lock, flags);
1063 locomo_writel(bpwf, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
1064 udelay(100);
1065 locomo_writel(duty, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALD);
1066 locomo_writel(bpwf | LOCOMO_ALC_EN, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
1067 spin_unlock_irqrestore(&lchip->lock, flags);
1068}
1069
1070/*
1066 * LoCoMo "Register Access Bus." 1071 * LoCoMo "Register Access Bus."
1067 * 1072 *
1068 * We model this as a regular bus type, and hang devices directly 1073 * We model this as a regular bus type, and hang devices directly
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 1596101cfaf8..374fb50608a0 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -14,6 +14,10 @@ config X86_32
14 486, 586, Pentiums, and various instruction-set-compatible chips by 14 486, 586, Pentiums, and various instruction-set-compatible chips by
15 AMD, Cyrix, and others. 15 AMD, Cyrix, and others.
16 16
17config GENERIC_TIME
18 bool
19 default y
20
17config SEMAPHORE_SLEEPERS 21config SEMAPHORE_SLEEPERS
18 bool 22 bool
19 default y 23 default y
@@ -1046,13 +1050,23 @@ config SCx200
1046 tristate "NatSemi SCx200 support" 1050 tristate "NatSemi SCx200 support"
1047 depends on !X86_VOYAGER 1051 depends on !X86_VOYAGER
1048 help 1052 help
1049 This provides basic support for the National Semiconductor SCx200 1053 This provides basic support for National Semiconductor's
1050 processor. Right now this is just a driver for the GPIO pins. 1054 (now AMD's) Geode processors. The driver probes for the
1055 PCI-IDs of several on-chip devices, so its a good dependency
1056 for other scx200_* drivers.
1051 1057
1052 If you don't know what to do here, say N. 1058 If compiled as a module, the driver is named scx200.
1053 1059
1054 This support is also available as a module. If compiled as a 1060config SCx200HR_TIMER
1055 module, it will be called scx200. 1061 tristate "NatSemi SCx200 27MHz High-Resolution Timer Support"
1062 depends on SCx200 && GENERIC_TIME
1063 default y
1064 help
1065 This driver provides a clocksource built upon the on-chip
1066 27MHz high-resolution timer. Its also a workaround for
1067 NSC Geode SC-1100's buggy TSC, which loses time when the
1068 processor goes idle (as is done by the scheduler). The
1069 other workaround is idle=poll boot option.
1056 1070
1057source "drivers/pcmcia/Kconfig" 1071source "drivers/pcmcia/Kconfig"
1058 1072
diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S
index c9343c3a8082..8c2a6faeeae5 100644
--- a/arch/i386/boot/video.S
+++ b/arch/i386/boot/video.S
@@ -1929,7 +1929,7 @@ skip10: movb %ah, %al
1929 ret 1929 ret
1930 1930
1931store_edid: 1931store_edid:
1932#ifdef CONFIG_FB_FIRMWARE_EDID 1932#ifdef CONFIG_FIRMWARE_EDID
1933 pushw %es # just save all registers 1933 pushw %es # just save all registers
1934 pushw %ax 1934 pushw %ax
1935 pushw %bx 1935 pushw %bx
@@ -1947,6 +1947,22 @@ store_edid:
1947 rep 1947 rep
1948 stosl 1948 stosl
1949 1949
1950 pushw %es # save ES
1951 xorw %di, %di # Report Capability
1952 pushw %di
1953 popw %es # ES:DI must be 0:0
1954 movw $0x4f15, %ax
1955 xorw %bx, %bx
1956 xorw %cx, %cx
1957 int $0x10
1958 popw %es # restore ES
1959
1960 cmpb $0x00, %ah # call successful
1961 jne no_edid
1962
1963 cmpb $0x4f, %al # function supported
1964 jne no_edid
1965
1950 movw $0x4f15, %ax # do VBE/DDC 1966 movw $0x4f15, %ax # do VBE/DDC
1951 movw $0x01, %bx 1967 movw $0x01, %bx
1952 movw $0x00, %cx 1968 movw $0x00, %cx
@@ -1954,6 +1970,7 @@ store_edid:
1954 movw $0x140, %di 1970 movw $0x140, %di
1955 int $0x10 1971 int $0x10
1956 1972
1973no_edid:
1957 popw %di # restore all registers 1974 popw %di # restore all registers
1958 popw %dx 1975 popw %dx
1959 popw %cx 1976 popw %cx
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 96fb8a020af2..0fac85df64f1 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -7,10 +7,9 @@ extra-y := head.o init_task.o vmlinux.lds
7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ 7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
9 pci-dma.o i386_ksyms.o i387.o bootflag.o \ 9 pci-dma.o i386_ksyms.o i387.o bootflag.o \
10 quirks.o i8237.o topology.o alternative.o 10 quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
11 11
12obj-y += cpu/ 12obj-y += cpu/
13obj-y += timers/
14obj-y += acpi/ 13obj-y += acpi/
15obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o 14obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
16obj-$(CONFIG_MCA) += mca.o 15obj-$(CONFIG_MCA) += mca.o
@@ -37,6 +36,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o
37obj-$(CONFIG_DOUBLEFAULT) += doublefault.o 36obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
38obj-$(CONFIG_VM86) += vm86.o 37obj-$(CONFIG_VM86) += vm86.o
39obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 38obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
39obj-$(CONFIG_HPET_TIMER) += hpet.o
40 40
41EXTRA_AFLAGS := -traditional 41EXTRA_AFLAGS := -traditional
42 42
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
new file mode 100644
index 000000000000..c6737c35815d
--- /dev/null
+++ b/arch/i386/kernel/hpet.c
@@ -0,0 +1,67 @@
1#include <linux/clocksource.h>
2#include <linux/errno.h>
3#include <linux/hpet.h>
4#include <linux/init.h>
5
6#include <asm/hpet.h>
7#include <asm/io.h>
8
9#define HPET_MASK CLOCKSOURCE_MASK(32)
10#define HPET_SHIFT 22
11
12/* FSEC = 10^-15 NSEC = 10^-9 */
13#define FSEC_PER_NSEC 1000000
14
15static void *hpet_ptr;
16
17static cycle_t read_hpet(void)
18{
19 return (cycle_t)readl(hpet_ptr);
20}
21
22static struct clocksource clocksource_hpet = {
23 .name = "hpet",
24 .rating = 250,
25 .read = read_hpet,
26 .mask = HPET_MASK,
27 .mult = 0, /* set below */
28 .shift = HPET_SHIFT,
29 .is_continuous = 1,
30};
31
32static int __init init_hpet_clocksource(void)
33{
34 unsigned long hpet_period;
35 void __iomem* hpet_base;
36 u64 tmp;
37
38 if (!hpet_address)
39 return -ENODEV;
40
41 /* calculate the hpet address: */
42 hpet_base =
43 (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
44 hpet_ptr = hpet_base + HPET_COUNTER;
45
46 /* calculate the frequency: */
47 hpet_period = readl(hpet_base + HPET_PERIOD);
48
49 /*
50 * hpet period is in femto seconds per cycle
51 * so we need to convert this to ns/cyc units
52 * aproximated by mult/2^shift
53 *
54 * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
55 * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
56 * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
57 * (fsec/cyc << shift)/1000000 = mult
58 * (hpet_period << shift)/FSEC_PER_NSEC = mult
59 */
60 tmp = (u64)hpet_period << HPET_SHIFT;
61 do_div(tmp, FSEC_PER_NSEC);
62 clocksource_hpet.mult = (u32)tmp;
63
64 return clocksource_register(&clocksource_hpet);
65}
66
67module_init(init_hpet_clocksource);
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c
new file mode 100644
index 000000000000..477b24daff53
--- /dev/null
+++ b/arch/i386/kernel/i8253.c
@@ -0,0 +1,118 @@
1/*
2 * i8253.c 8253/PIT functions
3 *
4 */
5#include <linux/clocksource.h>
6#include <linux/spinlock.h>
7#include <linux/jiffies.h>
8#include <linux/sysdev.h>
9#include <linux/module.h>
10#include <linux/init.h>
11
12#include <asm/smp.h>
13#include <asm/delay.h>
14#include <asm/i8253.h>
15#include <asm/io.h>
16
17#include "io_ports.h"
18
19DEFINE_SPINLOCK(i8253_lock);
20EXPORT_SYMBOL(i8253_lock);
21
22void setup_pit_timer(void)
23{
24 unsigned long flags;
25
26 spin_lock_irqsave(&i8253_lock, flags);
27 outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
28 udelay(10);
29 outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
30 udelay(10);
31 outb(LATCH >> 8 , PIT_CH0); /* MSB */
32 spin_unlock_irqrestore(&i8253_lock, flags);
33}
34
35/*
36 * Since the PIT overflows every tick, its not very useful
37 * to just read by itself. So use jiffies to emulate a free
38 * running counter:
39 */
40static cycle_t pit_read(void)
41{
42 unsigned long flags;
43 int count;
44 u32 jifs;
45 static int old_count;
46 static u32 old_jifs;
47
48 spin_lock_irqsave(&i8253_lock, flags);
49 /*
50 * Although our caller may have the read side of xtime_lock,
51 * this is now a seqlock, and we are cheating in this routine
52 * by having side effects on state that we cannot undo if
53 * there is a collision on the seqlock and our caller has to
54 * retry. (Namely, old_jifs and old_count.) So we must treat
55 * jiffies as volatile despite the lock. We read jiffies
56 * before latching the timer count to guarantee that although
57 * the jiffies value might be older than the count (that is,
58 * the counter may underflow between the last point where
59 * jiffies was incremented and the point where we latch the
60 * count), it cannot be newer.
61 */
62 jifs = jiffies;
63 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
64 count = inb_p(PIT_CH0); /* read the latched count */
65 count |= inb_p(PIT_CH0) << 8;
66
67 /* VIA686a test code... reset the latch if count > max + 1 */
68 if (count > LATCH) {
69 outb_p(0x34, PIT_MODE);
70 outb_p(LATCH & 0xff, PIT_CH0);
71 outb(LATCH >> 8, PIT_CH0);
72 count = LATCH - 1;
73 }
74
75 /*
76 * It's possible for count to appear to go the wrong way for a
77 * couple of reasons:
78 *
79 * 1. The timer counter underflows, but we haven't handled the
80 * resulting interrupt and incremented jiffies yet.
81 * 2. Hardware problem with the timer, not giving us continuous time,
82 * the counter does small "jumps" upwards on some Pentium systems,
83 * (see c't 95/10 page 335 for Neptun bug.)
84 *
85 * Previous attempts to handle these cases intelligently were
86 * buggy, so we just do the simple thing now.
87 */
88 if (count > old_count && jifs == old_jifs) {
89 count = old_count;
90 }
91 old_count = count;
92 old_jifs = jifs;
93
94 spin_unlock_irqrestore(&i8253_lock, flags);
95
96 count = (LATCH - 1) - count;
97
98 return (cycle_t)(jifs * LATCH) + count;
99}
100
101static struct clocksource clocksource_pit = {
102 .name = "pit",
103 .rating = 110,
104 .read = pit_read,
105 .mask = CLOCKSOURCE_MASK(32),
106 .mult = 0,
107 .shift = 20,
108};
109
110static int __init init_pit_clocksource(void)
111{
112 if (num_possible_cpus() > 4) /* PIT does not scale! */
113 return 0;
114
115 clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
116 return clocksource_register(&clocksource_pit);
117}
118module_init(init_pit_clocksource);
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index 395a9a6dff88..727e419ad78a 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -57,34 +57,85 @@ static __always_inline void set_jmp_op(void *from, void *to)
57/* 57/*
58 * returns non-zero if opcodes can be boosted. 58 * returns non-zero if opcodes can be boosted.
59 */ 59 */
60static __always_inline int can_boost(kprobe_opcode_t opcode) 60static __always_inline int can_boost(kprobe_opcode_t *opcodes)
61{ 61{
62 switch (opcode & 0xf0 ) { 62#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
63 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
64 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
65 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
66 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
67 << (row % 32))
68 /*
69 * Undefined/reserved opcodes, conditional jump, Opcode Extension
70 * Groups, and some special opcodes can not be boost.
71 */
72 static const unsigned long twobyte_is_boostable[256 / 32] = {
73 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
74 /* ------------------------------- */
75 W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */
76 W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
77 W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
78 W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
79 W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
80 W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
81 W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */
82 W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
83 W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */
84 W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
85 W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */
86 W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */
87 W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */
88 W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */
89 W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */
90 W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0) /* f0 */
91 /* ------------------------------- */
92 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
93 };
94#undef W
95 kprobe_opcode_t opcode;
96 kprobe_opcode_t *orig_opcodes = opcodes;
97retry:
98 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
99 return 0;
100 opcode = *(opcodes++);
101
102 /* 2nd-byte opcode */
103 if (opcode == 0x0f) {
104 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
105 return 0;
106 return test_bit(*opcodes, twobyte_is_boostable);
107 }
108
109 switch (opcode & 0xf0) {
110 case 0x60:
111 if (0x63 < opcode && opcode < 0x67)
112 goto retry; /* prefixes */
113 /* can't boost Address-size override and bound */
114 return (opcode != 0x62 && opcode != 0x67);
63 case 0x70: 115 case 0x70:
64 return 0; /* can't boost conditional jump */ 116 return 0; /* can't boost conditional jump */
65 case 0x90:
66 /* can't boost call and pushf */
67 return opcode != 0x9a && opcode != 0x9c;
68 case 0xc0: 117 case 0xc0:
69 /* can't boost undefined opcodes and soft-interruptions */ 118 /* can't boost software-interruptions */
70 return (0xc1 < opcode && opcode < 0xc6) || 119 return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
71 (0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf;
72 case 0xd0: 120 case 0xd0:
73 /* can boost AA* and XLAT */ 121 /* can boost AA* and XLAT */
74 return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); 122 return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
75 case 0xe0: 123 case 0xe0:
76 /* can boost in/out and (may be) jmps */ 124 /* can boost in/out and absolute jmps */
77 return (0xe3 < opcode && opcode != 0xe8); 125 return ((opcode & 0x04) || opcode == 0xea);
78 case 0xf0: 126 case 0xf0:
127 if ((opcode & 0x0c) == 0 && opcode != 0xf1)
128 goto retry; /* lock/rep(ne) prefix */
79 /* clear and set flags can be boost */ 129 /* clear and set flags can be boost */
80 return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); 130 return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
81 default: 131 default:
82 /* currently, can't boost 2 bytes opcodes */ 132 if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
83 return opcode != 0x0f; 133 goto retry; /* prefixes */
134 /* can't boost CS override and call */
135 return (opcode != 0x2e && opcode != 0x9a);
84 } 136 }
85} 137}
86 138
87
88/* 139/*
89 * returns non-zero if opcode modifies the interrupt flag. 140 * returns non-zero if opcode modifies the interrupt flag.
90 */ 141 */
@@ -109,7 +160,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
109 160
110 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 161 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
111 p->opcode = *p->addr; 162 p->opcode = *p->addr;
112 if (can_boost(p->opcode)) { 163 if (can_boost(p->addr)) {
113 p->ainsn.boostable = 0; 164 p->ainsn.boostable = 0;
114 } else { 165 } else {
115 p->ainsn.boostable = -1; 166 p->ainsn.boostable = -1;
@@ -208,7 +259,9 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
208 struct kprobe_ctlblk *kcb; 259 struct kprobe_ctlblk *kcb;
209#ifdef CONFIG_PREEMPT 260#ifdef CONFIG_PREEMPT
210 unsigned pre_preempt_count = preempt_count(); 261 unsigned pre_preempt_count = preempt_count();
211#endif /* CONFIG_PREEMPT */ 262#else
263 unsigned pre_preempt_count = 1;
264#endif
212 265
213 addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); 266 addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
214 267
@@ -285,22 +338,14 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
285 /* handler has already set things up, so skip ss setup */ 338 /* handler has already set things up, so skip ss setup */
286 return 1; 339 return 1;
287 340
288 if (p->ainsn.boostable == 1 && 341ss_probe:
289#ifdef CONFIG_PREEMPT 342 if (pre_preempt_count && p->ainsn.boostable == 1 && !p->post_handler){
290 !(pre_preempt_count) && /*
291 * This enables booster when the direct
292 * execution path aren't preempted.
293 */
294#endif /* CONFIG_PREEMPT */
295 !p->post_handler && !p->break_handler ) {
296 /* Boost up -- we can execute copied instructions directly */ 343 /* Boost up -- we can execute copied instructions directly */
297 reset_current_kprobe(); 344 reset_current_kprobe();
298 regs->eip = (unsigned long)p->ainsn.insn; 345 regs->eip = (unsigned long)p->ainsn.insn;
299 preempt_enable_no_resched(); 346 preempt_enable_no_resched();
300 return 1; 347 return 1;
301 } 348 }
302
303ss_probe:
304 prepare_singlestep(p, regs); 349 prepare_singlestep(p, regs);
305 kcb->kprobe_status = KPROBE_HIT_SS; 350 kcb->kprobe_status = KPROBE_HIT_SS;
306 return 1; 351 return 1;
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c
index 5f5b075f860a..0caf14652bad 100644
--- a/arch/i386/kernel/numaq.c
+++ b/arch/i386/kernel/numaq.c
@@ -79,10 +79,12 @@ int __init get_memcfg_numaq(void)
79 return 1; 79 return 1;
80} 80}
81 81
82static int __init numaq_dsc_disable(void) 82static int __init numaq_tsc_disable(void)
83{ 83{
84 printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); 84 if (num_online_nodes() > 1) {
85 tsc_disable = 1; 85 printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
86 tsc_disable = 1;
87 }
86 return 0; 88 return 0;
87} 89}
88core_initcall(numaq_dsc_disable); 90arch_initcall(numaq_tsc_disable);
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 6bef9273733e..4a65040cc624 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -1575,6 +1575,7 @@ void __init setup_arch(char **cmdline_p)
1575 conswitchp = &dummy_con; 1575 conswitchp = &dummy_con;
1576#endif 1576#endif
1577#endif 1577#endif
1578 tsc_init();
1578} 1579}
1579 1580
1580static __init int add_pcspkr(void) 1581static __init int add_pcspkr(void)
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 9d3074759856..5f43d0410122 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -82,13 +82,6 @@ extern unsigned long wall_jiffies;
82DEFINE_SPINLOCK(rtc_lock); 82DEFINE_SPINLOCK(rtc_lock);
83EXPORT_SYMBOL(rtc_lock); 83EXPORT_SYMBOL(rtc_lock);
84 84
85#include <asm/i8253.h>
86
87DEFINE_SPINLOCK(i8253_lock);
88EXPORT_SYMBOL(i8253_lock);
89
90struct timer_opts *cur_timer __read_mostly = &timer_none;
91
92/* 85/*
93 * This is a special lock that is owned by the CPU and holds the index 86 * This is a special lock that is owned by the CPU and holds the index
94 * register we are working with. It is required for NMI access to the 87 * register we are working with. It is required for NMI access to the
@@ -118,99 +111,19 @@ void rtc_cmos_write(unsigned char val, unsigned char addr)
118} 111}
119EXPORT_SYMBOL(rtc_cmos_write); 112EXPORT_SYMBOL(rtc_cmos_write);
120 113
121/*
122 * This version of gettimeofday has microsecond resolution
123 * and better than microsecond precision on fast x86 machines with TSC.
124 */
125void do_gettimeofday(struct timeval *tv)
126{
127 unsigned long seq;
128 unsigned long usec, sec;
129 unsigned long max_ntp_tick;
130
131 do {
132 unsigned long lost;
133
134 seq = read_seqbegin(&xtime_lock);
135
136 usec = cur_timer->get_offset();
137 lost = jiffies - wall_jiffies;
138
139 /*
140 * If time_adjust is negative then NTP is slowing the clock
141 * so make sure not to go into next possible interval.
142 * Better to lose some accuracy than have time go backwards..
143 */
144 if (unlikely(time_adjust < 0)) {
145 max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
146 usec = min(usec, max_ntp_tick);
147
148 if (lost)
149 usec += lost * max_ntp_tick;
150 }
151 else if (unlikely(lost))
152 usec += lost * (USEC_PER_SEC / HZ);
153
154 sec = xtime.tv_sec;
155 usec += (xtime.tv_nsec / 1000);
156 } while (read_seqretry(&xtime_lock, seq));
157
158 while (usec >= 1000000) {
159 usec -= 1000000;
160 sec++;
161 }
162
163 tv->tv_sec = sec;
164 tv->tv_usec = usec;
165}
166
167EXPORT_SYMBOL(do_gettimeofday);
168
169int do_settimeofday(struct timespec *tv)
170{
171 time_t wtm_sec, sec = tv->tv_sec;
172 long wtm_nsec, nsec = tv->tv_nsec;
173
174 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
175 return -EINVAL;
176
177 write_seqlock_irq(&xtime_lock);
178 /*
179 * This is revolting. We need to set "xtime" correctly. However, the
180 * value in this location is the value at the most recent update of
181 * wall time. Discover what correction gettimeofday() would have
182 * made, and then undo it!
183 */
184 nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
185 nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
186
187 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
188 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
189
190 set_normalized_timespec(&xtime, sec, nsec);
191 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
192
193 ntp_clear();
194 write_sequnlock_irq(&xtime_lock);
195 clock_was_set();
196 return 0;
197}
198
199EXPORT_SYMBOL(do_settimeofday);
200
201static int set_rtc_mmss(unsigned long nowtime) 114static int set_rtc_mmss(unsigned long nowtime)
202{ 115{
203 int retval; 116 int retval;
204 117 unsigned long flags;
205 WARN_ON(irqs_disabled());
206 118
207 /* gets recalled with irq locally disabled */ 119 /* gets recalled with irq locally disabled */
208 spin_lock_irq(&rtc_lock); 120 /* XXX - does irqsave resolve this? -johnstul */
121 spin_lock_irqsave(&rtc_lock, flags);
209 if (efi_enabled) 122 if (efi_enabled)
210 retval = efi_set_rtc_mmss(nowtime); 123 retval = efi_set_rtc_mmss(nowtime);
211 else 124 else
212 retval = mach_set_rtc_mmss(nowtime); 125 retval = mach_set_rtc_mmss(nowtime);
213 spin_unlock_irq(&rtc_lock); 126 spin_unlock_irqrestore(&rtc_lock, flags);
214 127
215 return retval; 128 return retval;
216} 129}
@@ -218,16 +131,6 @@ static int set_rtc_mmss(unsigned long nowtime)
218 131
219int timer_ack; 132int timer_ack;
220 133
221/* monotonic_clock(): returns # of nanoseconds passed since time_init()
222 * Note: This function is required to return accurate
223 * time even in the absence of multiple timer ticks.
224 */
225unsigned long long monotonic_clock(void)
226{
227 return cur_timer->monotonic_clock();
228}
229EXPORT_SYMBOL(monotonic_clock);
230
231#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) 134#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
232unsigned long profile_pc(struct pt_regs *regs) 135unsigned long profile_pc(struct pt_regs *regs)
233{ 136{
@@ -242,11 +145,21 @@ EXPORT_SYMBOL(profile_pc);
242#endif 145#endif
243 146
244/* 147/*
245 * timer_interrupt() needs to keep up the real-time clock, 148 * This is the same as the above, except we _also_ save the current
246 * as well as call the "do_timer()" routine every clocktick 149 * Time Stamp Counter value at the time of the timer interrupt, so that
150 * we later on can estimate the time of day more exactly.
247 */ 151 */
248static inline void do_timer_interrupt(int irq, struct pt_regs *regs) 152irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
249{ 153{
154 /*
155 * Here we are in the timer irq handler. We just have irqs locally
156 * disabled but we don't know if the timer_bh is running on the other
157 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
158 * the irq version of write_lock because as just said we have irq
159 * locally disabled. -arca
160 */
161 write_seqlock(&xtime_lock);
162
250#ifdef CONFIG_X86_IO_APIC 163#ifdef CONFIG_X86_IO_APIC
251 if (timer_ack) { 164 if (timer_ack) {
252 /* 165 /*
@@ -279,27 +192,6 @@ static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
279 irq = inb_p( 0x61 ); /* read the current state */ 192 irq = inb_p( 0x61 ); /* read the current state */
280 outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ 193 outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
281 } 194 }
282}
283
284/*
285 * This is the same as the above, except we _also_ save the current
286 * Time Stamp Counter value at the time of the timer interrupt, so that
287 * we later on can estimate the time of day more exactly.
288 */
289irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
290{
291 /*
292 * Here we are in the timer irq handler. We just have irqs locally
293 * disabled but we don't know if the timer_bh is running on the other
294 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
295 * the irq version of write_lock because as just said we have irq
296 * locally disabled. -arca
297 */
298 write_seqlock(&xtime_lock);
299
300 cur_timer->mark_offset();
301
302 do_timer_interrupt(irq, regs);
303 195
304 write_sequnlock(&xtime_lock); 196 write_sequnlock(&xtime_lock);
305 197
@@ -380,7 +272,6 @@ void notify_arch_cmos_timer(void)
380 272
381static long clock_cmos_diff, sleep_start; 273static long clock_cmos_diff, sleep_start;
382 274
383static struct timer_opts *last_timer;
384static int timer_suspend(struct sys_device *dev, pm_message_t state) 275static int timer_suspend(struct sys_device *dev, pm_message_t state)
385{ 276{
386 /* 277 /*
@@ -389,10 +280,6 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state)
389 clock_cmos_diff = -get_cmos_time(); 280 clock_cmos_diff = -get_cmos_time();
390 clock_cmos_diff += get_seconds(); 281 clock_cmos_diff += get_seconds();
391 sleep_start = get_cmos_time(); 282 sleep_start = get_cmos_time();
392 last_timer = cur_timer;
393 cur_timer = &timer_none;
394 if (last_timer->suspend)
395 last_timer->suspend(state);
396 return 0; 283 return 0;
397} 284}
398 285
@@ -415,10 +302,6 @@ static int timer_resume(struct sys_device *dev)
415 jiffies_64 += sleep_length; 302 jiffies_64 += sleep_length;
416 wall_jiffies += sleep_length; 303 wall_jiffies += sleep_length;
417 write_sequnlock_irqrestore(&xtime_lock, flags); 304 write_sequnlock_irqrestore(&xtime_lock, flags);
418 if (last_timer->resume)
419 last_timer->resume();
420 cur_timer = last_timer;
421 last_timer = NULL;
422 touch_softlockup_watchdog(); 305 touch_softlockup_watchdog();
423 return 0; 306 return 0;
424} 307}
@@ -460,9 +343,6 @@ static void __init hpet_time_init(void)
460 printk("Using HPET for base-timer\n"); 343 printk("Using HPET for base-timer\n");
461 } 344 }
462 345
463 cur_timer = select_timer();
464 printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
465
466 time_init_hook(); 346 time_init_hook();
467} 347}
468#endif 348#endif
@@ -484,8 +364,5 @@ void __init time_init(void)
484 set_normalized_timespec(&wall_to_monotonic, 364 set_normalized_timespec(&wall_to_monotonic,
485 -xtime.tv_sec, -xtime.tv_nsec); 365 -xtime.tv_sec, -xtime.tv_nsec);
486 366
487 cur_timer = select_timer();
488 printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
489
490 time_init_hook(); 367 time_init_hook();
491} 368}
diff --git a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile
deleted file mode 100644
index 8fa12be658dd..000000000000
--- a/arch/i386/kernel/timers/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
1#
2# Makefile for x86 timers
3#
4
5obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
6
7obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o
8obj-$(CONFIG_HPET_TIMER) += timer_hpet.o
9obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
deleted file mode 100644
index 8163fe0cf1f0..000000000000
--- a/arch/i386/kernel/timers/common.c
+++ /dev/null
@@ -1,172 +0,0 @@
1/*
2 * Common functions used across the timers go here
3 */
4
5#include <linux/init.h>
6#include <linux/timex.h>
7#include <linux/errno.h>
8#include <linux/jiffies.h>
9#include <linux/module.h>
10
11#include <asm/io.h>
12#include <asm/timer.h>
13#include <asm/hpet.h>
14
15#include "mach_timer.h"
16
17/* ------ Calibrate the TSC -------
18 * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
19 * Too much 64-bit arithmetic here to do this cleanly in C, and for
20 * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
21 * output busy loop as low as possible. We avoid reading the CTC registers
22 * directly because of the awkward 8-bit access mechanism of the 82C54
23 * device.
24 */
25
26#define CALIBRATE_TIME (5 * 1000020/HZ)
27
28unsigned long calibrate_tsc(void)
29{
30 mach_prepare_counter();
31
32 {
33 unsigned long startlow, starthigh;
34 unsigned long endlow, endhigh;
35 unsigned long count;
36
37 rdtsc(startlow,starthigh);
38 mach_countup(&count);
39 rdtsc(endlow,endhigh);
40
41
42 /* Error: ECTCNEVERSET */
43 if (count <= 1)
44 goto bad_ctc;
45
46 /* 64-bit subtract - gcc just messes up with long longs */
47 __asm__("subl %2,%0\n\t"
48 "sbbl %3,%1"
49 :"=a" (endlow), "=d" (endhigh)
50 :"g" (startlow), "g" (starthigh),
51 "0" (endlow), "1" (endhigh));
52
53 /* Error: ECPUTOOFAST */
54 if (endhigh)
55 goto bad_ctc;
56
57 /* Error: ECPUTOOSLOW */
58 if (endlow <= CALIBRATE_TIME)
59 goto bad_ctc;
60
61 __asm__("divl %2"
62 :"=a" (endlow), "=d" (endhigh)
63 :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
64
65 return endlow;
66 }
67
68 /*
69 * The CTC wasn't reliable: we got a hit on the very first read,
70 * or the CPU was so fast/slow that the quotient wouldn't fit in
71 * 32 bits..
72 */
73bad_ctc:
74 return 0;
75}
76
77#ifdef CONFIG_HPET_TIMER
78/* ------ Calibrate the TSC using HPET -------
79 * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
80 * Second output is parameter 1 (when non NULL)
81 * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
82 * calibrate_tsc() calibrates the processor TSC by comparing
83 * it to the HPET timer of known frequency.
84 * Too much 64-bit arithmetic here to do this cleanly in C
85 */
86#define CALIBRATE_CNT_HPET (5 * hpet_tick)
87#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
88
89unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
90{
91 unsigned long tsc_startlow, tsc_starthigh;
92 unsigned long tsc_endlow, tsc_endhigh;
93 unsigned long hpet_start, hpet_end;
94 unsigned long result, remain;
95
96 hpet_start = hpet_readl(HPET_COUNTER);
97 rdtsc(tsc_startlow, tsc_starthigh);
98 do {
99 hpet_end = hpet_readl(HPET_COUNTER);
100 } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
101 rdtsc(tsc_endlow, tsc_endhigh);
102
103 /* 64-bit subtract - gcc just messes up with long longs */
104 __asm__("subl %2,%0\n\t"
105 "sbbl %3,%1"
106 :"=a" (tsc_endlow), "=d" (tsc_endhigh)
107 :"g" (tsc_startlow), "g" (tsc_starthigh),
108 "0" (tsc_endlow), "1" (tsc_endhigh));
109
110 /* Error: ECPUTOOFAST */
111 if (tsc_endhigh)
112 goto bad_calibration;
113
114 /* Error: ECPUTOOSLOW */
115 if (tsc_endlow <= CALIBRATE_TIME_HPET)
116 goto bad_calibration;
117
118 ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
119 if (remain > (tsc_endlow >> 1))
120 result++; /* rounding the result */
121
122 if (tsc_hpet_quotient_ptr) {
123 unsigned long tsc_hpet_quotient;
124
125 ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
126 CALIBRATE_CNT_HPET);
127 if (remain > (tsc_endlow >> 1))
128 tsc_hpet_quotient++; /* rounding the result */
129 *tsc_hpet_quotient_ptr = tsc_hpet_quotient;
130 }
131
132 return result;
133bad_calibration:
134 /*
135 * the CPU was so fast/slow that the quotient wouldn't fit in
136 * 32 bits..
137 */
138 return 0;
139}
140#endif
141
142
143unsigned long read_timer_tsc(void)
144{
145 unsigned long retval;
146 rdtscl(retval);
147 return retval;
148}
149
150
151/* calculate cpu_khz */
152void init_cpu_khz(void)
153{
154 if (cpu_has_tsc) {
155 unsigned long tsc_quotient = calibrate_tsc();
156 if (tsc_quotient) {
157 /* report CPU clock rate in Hz.
158 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
159 * clock/second. Our precision is about 100 ppm.
160 */
161 { unsigned long eax=0, edx=1000;
162 __asm__("divl %2"
163 :"=a" (cpu_khz), "=d" (edx)
164 :"r" (tsc_quotient),
165 "0" (eax), "1" (edx));
166 printk("Detected %u.%03u MHz processor.\n",
167 cpu_khz / 1000, cpu_khz % 1000);
168 }
169 }
170 }
171}
172
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
deleted file mode 100644
index 7e39ed8e33f8..000000000000
--- a/arch/i386/kernel/timers/timer.c
+++ /dev/null
@@ -1,75 +0,0 @@
1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/string.h>
4#include <asm/timer.h>
5
6#ifdef CONFIG_HPET_TIMER
7/*
8 * HPET memory read is slower than tsc reads, but is more dependable as it
9 * always runs at constant frequency and reduces complexity due to
10 * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
11 * timer_pit when HPET is active. So, we default to timer_tsc.
12 */
13#endif
14/* list of timers, ordered by preference, NULL terminated */
15static struct init_timer_opts* __initdata timers[] = {
16#ifdef CONFIG_X86_CYCLONE_TIMER
17 &timer_cyclone_init,
18#endif
19#ifdef CONFIG_HPET_TIMER
20 &timer_hpet_init,
21#endif
22#ifdef CONFIG_X86_PM_TIMER
23 &timer_pmtmr_init,
24#endif
25 &timer_tsc_init,
26 &timer_pit_init,
27 NULL,
28};
29
30static char clock_override[10] __initdata;
31
32static int __init clock_setup(char* str)
33{
34 if (str)
35 strlcpy(clock_override, str, sizeof(clock_override));
36 return 1;
37}
38__setup("clock=", clock_setup);
39
40
41/* The chosen timesource has been found to be bad.
42 * Fall back to a known good timesource (the PIT)
43 */
44void clock_fallback(void)
45{
46 cur_timer = &timer_pit;
47}
48
49/* iterates through the list of timers, returning the first
50 * one that initializes successfully.
51 */
52struct timer_opts* __init select_timer(void)
53{
54 int i = 0;
55
56 /* find most preferred working timer */
57 while (timers[i]) {
58 if (timers[i]->init)
59 if (timers[i]->init(clock_override) == 0)
60 return timers[i]->opts;
61 ++i;
62 }
63
64 panic("select_timer: Cannot find a suitable timer\n");
65 return NULL;
66}
67
68int read_current_timer(unsigned long *timer_val)
69{
70 if (cur_timer->read_timer) {
71 *timer_val = cur_timer->read_timer();
72 return 0;
73 }
74 return -1;
75}
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
deleted file mode 100644
index 13892a65c941..000000000000
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ /dev/null
@@ -1,259 +0,0 @@
1/* Cyclone-timer:
2 * This code implements timer_ops for the cyclone counter found
3 * on IBM x440, x360, and other Summit based systems.
4 *
5 * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com)
6 */
7
8
9#include <linux/spinlock.h>
10#include <linux/init.h>
11#include <linux/timex.h>
12#include <linux/errno.h>
13#include <linux/string.h>
14#include <linux/jiffies.h>
15
16#include <asm/timer.h>
17#include <asm/io.h>
18#include <asm/pgtable.h>
19#include <asm/fixmap.h>
20#include <asm/i8253.h>
21
22#include "io_ports.h"
23
24/* Number of usecs that the last interrupt was delayed */
25static int delay_at_last_interrupt;
26
27#define CYCLONE_CBAR_ADDR 0xFEB00CD0
28#define CYCLONE_PMCC_OFFSET 0x51A0
29#define CYCLONE_MPMC_OFFSET 0x51D0
30#define CYCLONE_MPCS_OFFSET 0x51A8
31#define CYCLONE_TIMER_FREQ 100000000
32#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */
33int use_cyclone = 0;
34
35static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
36static u32 last_cyclone_low;
37static u32 last_cyclone_high;
38static unsigned long long monotonic_base;
39static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
40
41/* helper macro to atomically read both cyclone counter registers */
42#define read_cyclone_counter(low,high) \
43 do{ \
44 high = cyclone_timer[1]; low = cyclone_timer[0]; \
45 } while (high != cyclone_timer[1]);
46
47
48static void mark_offset_cyclone(void)
49{
50 unsigned long lost, delay;
51 unsigned long delta = last_cyclone_low;
52 int count;
53 unsigned long long this_offset, last_offset;
54
55 write_seqlock(&monotonic_lock);
56 last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
57
58 spin_lock(&i8253_lock);
59 read_cyclone_counter(last_cyclone_low,last_cyclone_high);
60
61 /* read values for delay_at_last_interrupt */
62 outb_p(0x00, 0x43); /* latch the count ASAP */
63
64 count = inb_p(0x40); /* read the latched count */
65 count |= inb(0x40) << 8;
66
67 /*
68 * VIA686a test code... reset the latch if count > max + 1
69 * from timer_pit.c - cjb
70 */
71 if (count > LATCH) {
72 outb_p(0x34, PIT_MODE);
73 outb_p(LATCH & 0xff, PIT_CH0);
74 outb(LATCH >> 8, PIT_CH0);
75 count = LATCH - 1;
76 }
77 spin_unlock(&i8253_lock);
78
79 /* lost tick compensation */
80 delta = last_cyclone_low - delta;
81 delta /= (CYCLONE_TIMER_FREQ/1000000);
82 delta += delay_at_last_interrupt;
83 lost = delta/(1000000/HZ);
84 delay = delta%(1000000/HZ);
85 if (lost >= 2)
86 jiffies_64 += lost-1;
87
88 /* update the monotonic base value */
89 this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
90 monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
91 write_sequnlock(&monotonic_lock);
92
93 /* calculate delay_at_last_interrupt */
94 count = ((LATCH-1) - count) * TICK_SIZE;
95 delay_at_last_interrupt = (count + LATCH/2) / LATCH;
96
97
98 /* catch corner case where tick rollover occured
99 * between cyclone and pit reads (as noted when
100 * usec delta is > 90% # of usecs/tick)
101 */
102 if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
103 jiffies_64++;
104}
105
106static unsigned long get_offset_cyclone(void)
107{
108 u32 offset;
109
110 if(!cyclone_timer)
111 return delay_at_last_interrupt;
112
113 /* Read the cyclone timer */
114 offset = cyclone_timer[0];
115
116 /* .. relative to previous jiffy */
117 offset = offset - last_cyclone_low;
118
119 /* convert cyclone ticks to microseconds */
120 /* XXX slow, can we speed this up? */
121 offset = offset/(CYCLONE_TIMER_FREQ/1000000);
122
123 /* our adjusted time offset in microseconds */
124 return delay_at_last_interrupt + offset;
125}
126
127static unsigned long long monotonic_clock_cyclone(void)
128{
129 u32 now_low, now_high;
130 unsigned long long last_offset, this_offset, base;
131 unsigned long long ret;
132 unsigned seq;
133
134 /* atomically read monotonic base & last_offset */
135 do {
136 seq = read_seqbegin(&monotonic_lock);
137 last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
138 base = monotonic_base;
139 } while (read_seqretry(&monotonic_lock, seq));
140
141
142 /* Read the cyclone counter */
143 read_cyclone_counter(now_low,now_high);
144 this_offset = ((unsigned long long)now_high<<32)|now_low;
145
146 /* convert to nanoseconds */
147 ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
148 return ret * (1000000000 / CYCLONE_TIMER_FREQ);
149}
150
151static int __init init_cyclone(char* override)
152{
153 u32* reg;
154 u32 base; /* saved cyclone base address */
155 u32 pageaddr; /* page that contains cyclone_timer register */
156 u32 offset; /* offset from pageaddr to cyclone_timer register */
157 int i;
158
159 /* check clock override */
160 if (override[0] && strncmp(override,"cyclone",7))
161 return -ENODEV;
162
163 /*make sure we're on a summit box*/
164 if(!use_cyclone) return -ENODEV;
165
166 printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
167
168 /* find base address */
169 pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK;
170 offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK);
171 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
172 reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
173 if(!reg){
174 printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
175 return -ENODEV;
176 }
177 base = *reg;
178 if(!base){
179 printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
180 return -ENODEV;
181 }
182
183 /* setup PMCC */
184 pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK;
185 offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK);
186 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
187 reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
188 if(!reg){
189 printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
190 return -ENODEV;
191 }
192 reg[0] = 0x00000001;
193
194 /* setup MPCS */
195 pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK;
196 offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK);
197 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
198 reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
199 if(!reg){
200 printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
201 return -ENODEV;
202 }
203 reg[0] = 0x00000001;
204
205 /* map in cyclone_timer */
206 pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK;
207 offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK);
208 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
209 cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
210 if(!cyclone_timer){
211 printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
212 return -ENODEV;
213 }
214
215 /*quick test to make sure its ticking*/
216 for(i=0; i<3; i++){
217 u32 old = cyclone_timer[0];
218 int stall = 100;
219 while(stall--) barrier();
220 if(cyclone_timer[0] == old){
221 printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
222 cyclone_timer = 0;
223 return -ENODEV;
224 }
225 }
226
227 init_cpu_khz();
228
229 /* Everything looks good! */
230 return 0;
231}
232
233
234static void delay_cyclone(unsigned long loops)
235{
236 unsigned long bclock, now;
237 if(!cyclone_timer)
238 return;
239 bclock = cyclone_timer[0];
240 do {
241 rep_nop();
242 now = cyclone_timer[0];
243 } while ((now-bclock) < loops);
244}
245/************************************************************/
246
247/* cyclone timer_opts struct */
248static struct timer_opts timer_cyclone = {
249 .name = "cyclone",
250 .mark_offset = mark_offset_cyclone,
251 .get_offset = get_offset_cyclone,
252 .monotonic_clock = monotonic_clock_cyclone,
253 .delay = delay_cyclone,
254};
255
256struct init_timer_opts __initdata timer_cyclone_init = {
257 .init = init_cyclone,
258 .opts = &timer_cyclone,
259};
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
deleted file mode 100644
index 17a6fe7166e7..000000000000
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ /dev/null
@@ -1,217 +0,0 @@
1/*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 */
5
6#include <linux/spinlock.h>
7#include <linux/init.h>
8#include <linux/timex.h>
9#include <linux/errno.h>
10#include <linux/string.h>
11#include <linux/jiffies.h>
12
13#include <asm/timer.h>
14#include <asm/io.h>
15#include <asm/processor.h>
16
17#include "io_ports.h"
18#include "mach_timer.h"
19#include <asm/hpet.h>
20
21static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */
22static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */
23static unsigned long hpet_last; /* hpet counter value at last tick*/
24static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
25static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
26static unsigned long long monotonic_base;
27static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
28
29/* convert from cycles(64bits) => nanoseconds (64bits)
30 * basic equation:
31 * ns = cycles / (freq / ns_per_sec)
32 * ns = cycles * (ns_per_sec / freq)
33 * ns = cycles * (10^9 / (cpu_khz * 10^3))
34 * ns = cycles * (10^6 / cpu_khz)
35 *
36 * Then we use scaling math (suggested by george@mvista.com) to get:
37 * ns = cycles * (10^6 * SC / cpu_khz) / SC
38 * ns = cycles * cyc2ns_scale / SC
39 *
40 * And since SC is a constant power of two, we can convert the div
41 * into a shift.
42 *
43 * We can use khz divisor instead of mhz to keep a better percision, since
44 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
45 * (mathieu.desnoyers@polymtl.ca)
46 *
47 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
48 */
49static unsigned long cyc2ns_scale __read_mostly;
50#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
51
52static inline void set_cyc2ns_scale(unsigned long cpu_khz)
53{
54 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
55}
56
57static inline unsigned long long cycles_2_ns(unsigned long long cyc)
58{
59 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
60}
61
62static unsigned long long monotonic_clock_hpet(void)
63{
64 unsigned long long last_offset, this_offset, base;
65 unsigned seq;
66
67 /* atomically read monotonic base & last_offset */
68 do {
69 seq = read_seqbegin(&monotonic_lock);
70 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
71 base = monotonic_base;
72 } while (read_seqretry(&monotonic_lock, seq));
73
74 /* Read the Time Stamp Counter */
75 rdtscll(this_offset);
76
77 /* return the value in ns */
78 return base + cycles_2_ns(this_offset - last_offset);
79}
80
81static unsigned long get_offset_hpet(void)
82{
83 register unsigned long eax, edx;
84
85 eax = hpet_readl(HPET_COUNTER);
86 eax -= hpet_last; /* hpet delta */
87 eax = min(hpet_tick, eax);
88 /*
89 * Time offset = (hpet delta) * ( usecs per HPET clock )
90 * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
91 * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
92 *
93 * Where,
94 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
95 *
96 * Using a mull instead of a divl saves some cycles in critical path.
97 */
98 ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax);
99
100 /* our adjusted time offset in microseconds */
101 return edx;
102}
103
104static void mark_offset_hpet(void)
105{
106 unsigned long long this_offset, last_offset;
107 unsigned long offset;
108
109 write_seqlock(&monotonic_lock);
110 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
111 rdtsc(last_tsc_low, last_tsc_high);
112
113 if (hpet_use_timer)
114 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
115 else
116 offset = hpet_readl(HPET_COUNTER);
117 if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) {
118 int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1;
119 jiffies_64 += lost_ticks;
120 }
121 hpet_last = offset;
122
123 /* update the monotonic base value */
124 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
125 monotonic_base += cycles_2_ns(this_offset - last_offset);
126 write_sequnlock(&monotonic_lock);
127}
128
129static void delay_hpet(unsigned long loops)
130{
131 unsigned long hpet_start, hpet_end;
132 unsigned long eax;
133
134 /* loops is the number of cpu cycles. Convert it to hpet clocks */
135 ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops);
136
137 hpet_start = hpet_readl(HPET_COUNTER);
138 do {
139 rep_nop();
140 hpet_end = hpet_readl(HPET_COUNTER);
141 } while ((hpet_end - hpet_start) < (loops));
142}
143
144static struct timer_opts timer_hpet;
145
146static int __init init_hpet(char* override)
147{
148 unsigned long result, remain;
149
150 /* check clock override */
151 if (override[0] && strncmp(override,"hpet",4))
152 return -ENODEV;
153
154 if (!is_hpet_enabled())
155 return -ENODEV;
156
157 printk("Using HPET for gettimeofday\n");
158 if (cpu_has_tsc) {
159 unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient);
160 if (tsc_quotient) {
161 /* report CPU clock rate in Hz.
162 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
163 * clock/second. Our precision is about 100 ppm.
164 */
165 { unsigned long eax=0, edx=1000;
166 ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
167 eax, edx);
168 printk("Detected %u.%03u MHz processor.\n",
169 cpu_khz / 1000, cpu_khz % 1000);
170 }
171 set_cyc2ns_scale(cpu_khz);
172 }
173 /* set this only when cpu_has_tsc */
174 timer_hpet.read_timer = read_timer_tsc;
175 }
176
177 /*
178 * Math to calculate hpet to usec multiplier
179 * Look for the comments at get_offset_hpet()
180 */
181 ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC);
182 if (remain > (hpet_tick >> 1))
183 result++; /* rounding the result */
184 hpet_usec_quotient = result;
185
186 return 0;
187}
188
189static int hpet_resume(void)
190{
191 write_seqlock(&monotonic_lock);
192 /* Assume this is the last mark offset time */
193 rdtsc(last_tsc_low, last_tsc_high);
194
195 if (hpet_use_timer)
196 hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
197 else
198 hpet_last = hpet_readl(HPET_COUNTER);
199 write_sequnlock(&monotonic_lock);
200 return 0;
201}
202/************************************************************/
203
204/* tsc timer_opts struct */
205static struct timer_opts timer_hpet __read_mostly = {
206 .name = "hpet",
207 .mark_offset = mark_offset_hpet,
208 .get_offset = get_offset_hpet,
209 .monotonic_clock = monotonic_clock_hpet,
210 .delay = delay_hpet,
211 .resume = hpet_resume,
212};
213
214struct init_timer_opts __initdata timer_hpet_init = {
215 .init = init_hpet,
216 .opts = &timer_hpet,
217};
diff --git a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
deleted file mode 100644
index 4ea2f414dbbd..000000000000
--- a/arch/i386/kernel/timers/timer_none.c
+++ /dev/null
@@ -1,39 +0,0 @@
1#include <linux/init.h>
2#include <asm/timer.h>
3
4static void mark_offset_none(void)
5{
6 /* nothing needed */
7}
8
9static unsigned long get_offset_none(void)
10{
11 return 0;
12}
13
14static unsigned long long monotonic_clock_none(void)
15{
16 return 0;
17}
18
19static void delay_none(unsigned long loops)
20{
21 int d0;
22 __asm__ __volatile__(
23 "\tjmp 1f\n"
24 ".align 16\n"
25 "1:\tjmp 2f\n"
26 ".align 16\n"
27 "2:\tdecl %0\n\tjns 2b"
28 :"=&a" (d0)
29 :"0" (loops));
30}
31
32/* none timer_opts struct */
33struct timer_opts timer_none = {
34 .name = "none",
35 .mark_offset = mark_offset_none,
36 .get_offset = get_offset_none,
37 .monotonic_clock = monotonic_clock_none,
38 .delay = delay_none,
39};
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
deleted file mode 100644
index b9b6bd56b9ba..000000000000
--- a/arch/i386/kernel/timers/timer_pit.c
+++ /dev/null
@@ -1,177 +0,0 @@
1/*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 */
5
6#include <linux/spinlock.h>
7#include <linux/module.h>
8#include <linux/device.h>
9#include <linux/sysdev.h>
10#include <linux/timex.h>
11#include <asm/delay.h>
12#include <asm/mpspec.h>
13#include <asm/timer.h>
14#include <asm/smp.h>
15#include <asm/io.h>
16#include <asm/arch_hooks.h>
17#include <asm/i8253.h>
18
19#include "do_timer.h"
20#include "io_ports.h"
21
22static int count_p; /* counter in get_offset_pit() */
23
24static int __init init_pit(char* override)
25{
26 /* check clock override */
27 if (override[0] && strncmp(override,"pit",3))
28 printk(KERN_ERR "Warning: clock= override failed. Defaulting "
29 "to PIT\n");
30 init_cpu_khz();
31 count_p = LATCH;
32 return 0;
33}
34
35static void mark_offset_pit(void)
36{
37 /* nothing needed */
38}
39
40static unsigned long long monotonic_clock_pit(void)
41{
42 return 0;
43}
44
45static void delay_pit(unsigned long loops)
46{
47 int d0;
48 __asm__ __volatile__(
49 "\tjmp 1f\n"
50 ".align 16\n"
51 "1:\tjmp 2f\n"
52 ".align 16\n"
53 "2:\tdecl %0\n\tjns 2b"
54 :"=&a" (d0)
55 :"0" (loops));
56}
57
58
59/* This function must be called with xtime_lock held.
60 * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs
61 *
62 * However, the pc-audio speaker driver changes the divisor so that
63 * it gets interrupted rather more often - it loads 64 into the
64 * counter rather than 11932! This has an adverse impact on
65 * do_gettimeoffset() -- it stops working! What is also not
66 * good is that the interval that our timer function gets called
67 * is no longer 10.0002 ms, but 9.9767 ms. To get around this
68 * would require using a different timing source. Maybe someone
69 * could use the RTC - I know that this can interrupt at frequencies
70 * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
71 * it so that at startup, the timer code in sched.c would select
72 * using either the RTC or the 8253 timer. The decision would be
73 * based on whether there was any other device around that needed
74 * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz,
75 * and then do some jiggery to have a version of do_timer that
76 * advanced the clock by 1/1024 s. Every time that reached over 1/100
77 * of a second, then do all the old code. If the time was kept correct
78 * then do_gettimeoffset could just return 0 - there is no low order
79 * divider that can be accessed.
80 *
81 * Ideally, you would be able to use the RTC for the speaker driver,
82 * but it appears that the speaker driver really needs interrupt more
83 * often than every 120 us or so.
84 *
85 * Anyway, this needs more thought.... pjsg (1993-08-28)
86 *
87 * If you are really that interested, you should be reading
88 * comp.protocols.time.ntp!
89 */
90
91static unsigned long get_offset_pit(void)
92{
93 int count;
94 unsigned long flags;
95 static unsigned long jiffies_p = 0;
96
97 /*
98 * cache volatile jiffies temporarily; we have xtime_lock.
99 */
100 unsigned long jiffies_t;
101
102 spin_lock_irqsave(&i8253_lock, flags);
103 /* timer count may underflow right here */
104 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
105
106 count = inb_p(PIT_CH0); /* read the latched count */
107
108 /*
109 * We do this guaranteed double memory access instead of a _p
110 * postfix in the previous port access. Wheee, hackady hack
111 */
112 jiffies_t = jiffies;
113
114 count |= inb_p(PIT_CH0) << 8;
115
116 /* VIA686a test code... reset the latch if count > max + 1 */
117 if (count > LATCH) {
118 outb_p(0x34, PIT_MODE);
119 outb_p(LATCH & 0xff, PIT_CH0);
120 outb(LATCH >> 8, PIT_CH0);
121 count = LATCH - 1;
122 }
123
124 /*
125 * avoiding timer inconsistencies (they are rare, but they happen)...
126 * there are two kinds of problems that must be avoided here:
127 * 1. the timer counter underflows
128 * 2. hardware problem with the timer, not giving us continuous time,
129 * the counter does small "jumps" upwards on some Pentium systems,
130 * (see c't 95/10 page 335 for Neptun bug.)
131 */
132
133 if( jiffies_t == jiffies_p ) {
134 if( count > count_p ) {
135 /* the nutcase */
136 count = do_timer_overflow(count);
137 }
138 } else
139 jiffies_p = jiffies_t;
140
141 count_p = count;
142
143 spin_unlock_irqrestore(&i8253_lock, flags);
144
145 count = ((LATCH-1) - count) * TICK_SIZE;
146 count = (count + LATCH/2) / LATCH;
147
148 return count;
149}
150
151
152/* tsc timer_opts struct */
153struct timer_opts timer_pit = {
154 .name = "pit",
155 .mark_offset = mark_offset_pit,
156 .get_offset = get_offset_pit,
157 .monotonic_clock = monotonic_clock_pit,
158 .delay = delay_pit,
159};
160
161struct init_timer_opts __initdata timer_pit_init = {
162 .init = init_pit,
163 .opts = &timer_pit,
164};
165
166void setup_pit_timer(void)
167{
168 unsigned long flags;
169
170 spin_lock_irqsave(&i8253_lock, flags);
171 outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
172 udelay(10);
173 outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
174 udelay(10);
175 outb(LATCH >> 8 , PIT_CH0); /* MSB */
176 spin_unlock_irqrestore(&i8253_lock, flags);
177}
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
deleted file mode 100644
index 144e94a04933..000000000000
--- a/arch/i386/kernel/timers/timer_pm.c
+++ /dev/null
@@ -1,342 +0,0 @@
1/*
2 * (C) Dominik Brodowski <linux@brodo.de> 2003
3 *
4 * Driver to use the Power Management Timer (PMTMR) available in some
5 * southbridges as primary timing source for the Linux kernel.
6 *
7 * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
8 * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
9 *
10 * This file is licensed under the GPL v2.
11 */
12
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/device.h>
17#include <linux/init.h>
18#include <linux/pci.h>
19#include <asm/types.h>
20#include <asm/timer.h>
21#include <asm/smp.h>
22#include <asm/io.h>
23#include <asm/arch_hooks.h>
24
25#include <linux/timex.h>
26#include "mach_timer.h"
27
28/* Number of PMTMR ticks expected during calibration run */
29#define PMTMR_TICKS_PER_SEC 3579545
30#define PMTMR_EXPECTED_RATE \
31 ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
32
33
34/* The I/O port the PMTMR resides at.
35 * The location is detected during setup_arch(),
36 * in arch/i386/acpi/boot.c */
37u32 pmtmr_ioport = 0;
38
39
40/* value of the Power timer at last timer interrupt */
41static u32 offset_tick;
42static u32 offset_delay;
43
44static unsigned long long monotonic_base;
45static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
46
47#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
48
49static int pmtmr_need_workaround __read_mostly = 1;
50
51/*helper function to safely read acpi pm timesource*/
52static inline u32 read_pmtmr(void)
53{
54 if (pmtmr_need_workaround) {
55 u32 v1, v2, v3;
56
57 /* It has been reported that because of various broken
58 * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
59 * source is not latched, so you must read it multiple
60 * times to insure a safe value is read.
61 */
62 do {
63 v1 = inl(pmtmr_ioport);
64 v2 = inl(pmtmr_ioport);
65 v3 = inl(pmtmr_ioport);
66 } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
67 || (v3 > v1 && v3 < v2));
68
69 /* mask the output to 24 bits */
70 return v2 & ACPI_PM_MASK;
71 }
72
73 return inl(pmtmr_ioport) & ACPI_PM_MASK;
74}
75
76
77/*
78 * Some boards have the PMTMR running way too fast. We check
79 * the PMTMR rate against PIT channel 2 to catch these cases.
80 */
81static int verify_pmtmr_rate(void)
82{
83 u32 value1, value2;
84 unsigned long count, delta;
85
86 mach_prepare_counter();
87 value1 = read_pmtmr();
88 mach_countup(&count);
89 value2 = read_pmtmr();
90 delta = (value2 - value1) & ACPI_PM_MASK;
91
92 /* Check that the PMTMR delta is within 5% of what we expect */
93 if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
94 delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
95 printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
96 return -1;
97 }
98
99 return 0;
100}
101
102
103static int init_pmtmr(char* override)
104{
105 u32 value1, value2;
106 unsigned int i;
107
108 if (override[0] && strncmp(override,"pmtmr",5))
109 return -ENODEV;
110
111 if (!pmtmr_ioport)
112 return -ENODEV;
113
114 /* we use the TSC for delay_pmtmr, so make sure it exists */
115 if (!cpu_has_tsc)
116 return -ENODEV;
117
118 /* "verify" this timing source */
119 value1 = read_pmtmr();
120 for (i = 0; i < 10000; i++) {
121 value2 = read_pmtmr();
122 if (value2 == value1)
123 continue;
124 if (value2 > value1)
125 goto pm_good;
126 if ((value2 < value1) && ((value2) < 0xFFF))
127 goto pm_good;
128 printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
129 return -EINVAL;
130 }
131 printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
132 return -ENODEV;
133
134pm_good:
135 if (verify_pmtmr_rate() != 0)
136 return -ENODEV;
137
138 init_cpu_khz();
139 return 0;
140}
141
142static inline u32 cyc2us(u32 cycles)
143{
144 /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
145 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
146 *
147 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
148 * easily be multiplied with 286 (=0x11E) without having to fear
149 * u32 overflows.
150 */
151 cycles *= 286;
152 return (cycles >> 10);
153}
154
155/*
156 * this gets called during each timer interrupt
157 * - Called while holding the writer xtime_lock
158 */
159static void mark_offset_pmtmr(void)
160{
161 u32 lost, delta, last_offset;
162 static int first_run = 1;
163 last_offset = offset_tick;
164
165 write_seqlock(&monotonic_lock);
166
167 offset_tick = read_pmtmr();
168
169 /* calculate tick interval */
170 delta = (offset_tick - last_offset) & ACPI_PM_MASK;
171
172 /* convert to usecs */
173 delta = cyc2us(delta);
174
175 /* update the monotonic base value */
176 monotonic_base += delta * NSEC_PER_USEC;
177 write_sequnlock(&monotonic_lock);
178
179 /* convert to ticks */
180 delta += offset_delay;
181 lost = delta / (USEC_PER_SEC / HZ);
182 offset_delay = delta % (USEC_PER_SEC / HZ);
183
184
185 /* compensate for lost ticks */
186 if (lost >= 2)
187 jiffies_64 += lost - 1;
188
189 /* don't calculate delay for first run,
190 or if we've got less then a tick */
191 if (first_run || (lost < 1)) {
192 first_run = 0;
193 offset_delay = 0;
194 }
195}
196
197static int pmtmr_resume(void)
198{
199 write_seqlock(&monotonic_lock);
200 /* Assume this is the last mark offset time */
201 offset_tick = read_pmtmr();
202 write_sequnlock(&monotonic_lock);
203 return 0;
204}
205
206static unsigned long long monotonic_clock_pmtmr(void)
207{
208 u32 last_offset, this_offset;
209 unsigned long long base, ret;
210 unsigned seq;
211
212
213 /* atomically read monotonic base & last_offset */
214 do {
215 seq = read_seqbegin(&monotonic_lock);
216 last_offset = offset_tick;
217 base = monotonic_base;
218 } while (read_seqretry(&monotonic_lock, seq));
219
220 /* Read the pmtmr */
221 this_offset = read_pmtmr();
222
223 /* convert to nanoseconds */
224 ret = (this_offset - last_offset) & ACPI_PM_MASK;
225 ret = base + (cyc2us(ret) * NSEC_PER_USEC);
226 return ret;
227}
228
229static void delay_pmtmr(unsigned long loops)
230{
231 unsigned long bclock, now;
232
233 rdtscl(bclock);
234 do
235 {
236 rep_nop();
237 rdtscl(now);
238 } while ((now-bclock) < loops);
239}
240
241
242/*
243 * get the offset (in microseconds) from the last call to mark_offset()
244 * - Called holding a reader xtime_lock
245 */
246static unsigned long get_offset_pmtmr(void)
247{
248 u32 now, offset, delta = 0;
249
250 offset = offset_tick;
251 now = read_pmtmr();
252 delta = (now - offset)&ACPI_PM_MASK;
253
254 return (unsigned long) offset_delay + cyc2us(delta);
255}
256
257
258/* acpi timer_opts struct */
259static struct timer_opts timer_pmtmr = {
260 .name = "pmtmr",
261 .mark_offset = mark_offset_pmtmr,
262 .get_offset = get_offset_pmtmr,
263 .monotonic_clock = monotonic_clock_pmtmr,
264 .delay = delay_pmtmr,
265 .read_timer = read_timer_tsc,
266 .resume = pmtmr_resume,
267};
268
269struct init_timer_opts __initdata timer_pmtmr_init = {
270 .init = init_pmtmr,
271 .opts = &timer_pmtmr,
272};
273
274#ifdef CONFIG_PCI
275/*
276 * PIIX4 Errata:
277 *
278 * The power management timer may return improper results when read.
279 * Although the timer value settles properly after incrementing,
280 * while incrementing there is a 3 ns window every 69.8 ns where the
281 * timer value is indeterminate (a 4.2% chance that the data will be
282 * incorrect when read). As a result, the ACPI free running count up
283 * timer specification is violated due to erroneous reads.
284 */
285static int __init pmtmr_bug_check(void)
286{
287 static struct pci_device_id gray_list[] __initdata = {
288 /* these chipsets may have bug. */
289 { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
290 PCI_DEVICE_ID_INTEL_82801DB_0) },
291 { },
292 };
293 struct pci_dev *dev;
294 int pmtmr_has_bug = 0;
295 u8 rev;
296
297 if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround)
298 return 0;
299
300 dev = pci_get_device(PCI_VENDOR_ID_INTEL,
301 PCI_DEVICE_ID_INTEL_82371AB_3, NULL);
302 if (dev) {
303 pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
304 /* the bug has been fixed in PIIX4M */
305 if (rev < 3) {
306 printk(KERN_WARNING "* Found PM-Timer Bug on this "
307 "chipset. Due to workarounds for a bug,\n"
308 "* this time source is slow. Consider trying "
309 "other time sources (clock=)\n");
310 pmtmr_has_bug = 1;
311 }
312 pci_dev_put(dev);
313 }
314
315 if (pci_dev_present(gray_list)) {
316 printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due"
317 " to workarounds for a bug,\n"
318 "* this time source is slow. If you are sure your timer"
319 " does not have\n"
320 "* this bug, please use \"pmtmr_good\" to disable the "
321 "workaround\n");
322 pmtmr_has_bug = 1;
323 }
324
325 if (!pmtmr_has_bug)
326 pmtmr_need_workaround = 0;
327
328 return 0;
329}
330device_initcall(pmtmr_bug_check);
331#endif
332
333static int __init pmtr_good_setup(char *__str)
334{
335 pmtmr_need_workaround = 0;
336 return 1;
337}
338__setup("pmtmr_good", pmtr_good_setup);
339
340MODULE_LICENSE("GPL");
341MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
342MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
deleted file mode 100644
index f1187ddb0d0f..000000000000
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ /dev/null
@@ -1,617 +0,0 @@
1/*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 *
5 * 2004-06-25 Jesper Juhl
6 * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
7 * failing to inline.
8 */
9
10#include <linux/spinlock.h>
11#include <linux/init.h>
12#include <linux/timex.h>
13#include <linux/errno.h>
14#include <linux/cpufreq.h>
15#include <linux/string.h>
16#include <linux/jiffies.h>
17
18#include <asm/timer.h>
19#include <asm/io.h>
20/* processor.h for distable_tsc flag */
21#include <asm/processor.h>
22
23#include "io_ports.h"
24#include "mach_timer.h"
25
26#include <asm/hpet.h>
27#include <asm/i8253.h>
28
29#ifdef CONFIG_HPET_TIMER
30static unsigned long hpet_usec_quotient;
31static unsigned long hpet_last;
32static struct timer_opts timer_tsc;
33#endif
34
35static inline void cpufreq_delayed_get(void);
36
37int tsc_disable __devinitdata = 0;
38
39static int use_tsc;
40/* Number of usecs that the last interrupt was delayed */
41static int delay_at_last_interrupt;
42
43static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
44static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
45static unsigned long long monotonic_base;
46static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
47
48/* Avoid compensating for lost ticks before TSCs are synched */
49static int detect_lost_ticks;
50static int __init start_lost_tick_compensation(void)
51{
52 detect_lost_ticks = 1;
53 return 0;
54}
55late_initcall(start_lost_tick_compensation);
56
57/* convert from cycles(64bits) => nanoseconds (64bits)
58 * basic equation:
59 * ns = cycles / (freq / ns_per_sec)
60 * ns = cycles * (ns_per_sec / freq)
61 * ns = cycles * (10^9 / (cpu_khz * 10^3))
62 * ns = cycles * (10^6 / cpu_khz)
63 *
64 * Then we use scaling math (suggested by george@mvista.com) to get:
65 * ns = cycles * (10^6 * SC / cpu_khz) / SC
66 * ns = cycles * cyc2ns_scale / SC
67 *
68 * And since SC is a constant power of two, we can convert the div
69 * into a shift.
70 *
71 * We can use khz divisor instead of mhz to keep a better percision, since
72 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
73 * (mathieu.desnoyers@polymtl.ca)
74 *
75 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
76 */
77static unsigned long cyc2ns_scale __read_mostly;
78#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
79
80static inline void set_cyc2ns_scale(unsigned long cpu_khz)
81{
82 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
83}
84
85static inline unsigned long long cycles_2_ns(unsigned long long cyc)
86{
87 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
88}
89
90static int count2; /* counter for mark_offset_tsc() */
91
92/* Cached *multiplier* to convert TSC counts to microseconds.
93 * (see the equation below).
94 * Equal to 2^32 * (1 / (clocks per usec) ).
95 * Initialized in time_init.
96 */
97static unsigned long fast_gettimeoffset_quotient;
98
99static unsigned long get_offset_tsc(void)
100{
101 register unsigned long eax, edx;
102
103 /* Read the Time Stamp Counter */
104
105 rdtsc(eax,edx);
106
107 /* .. relative to previous jiffy (32 bits is enough) */
108 eax -= last_tsc_low; /* tsc_low delta */
109
110 /*
111 * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
112 * = (tsc_low delta) * (usecs_per_clock)
113 * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
114 *
115 * Using a mull instead of a divl saves up to 31 clock cycles
116 * in the critical path.
117 */
118
119 __asm__("mull %2"
120 :"=a" (eax), "=d" (edx)
121 :"rm" (fast_gettimeoffset_quotient),
122 "0" (eax));
123
124 /* our adjusted time offset in microseconds */
125 return delay_at_last_interrupt + edx;
126}
127
128static unsigned long long monotonic_clock_tsc(void)
129{
130 unsigned long long last_offset, this_offset, base;
131 unsigned seq;
132
133 /* atomically read monotonic base & last_offset */
134 do {
135 seq = read_seqbegin(&monotonic_lock);
136 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
137 base = monotonic_base;
138 } while (read_seqretry(&monotonic_lock, seq));
139
140 /* Read the Time Stamp Counter */
141 rdtscll(this_offset);
142
143 /* return the value in ns */
144 return base + cycles_2_ns(this_offset - last_offset);
145}
146
147/*
148 * Scheduler clock - returns current time in nanosec units.
149 */
150unsigned long long sched_clock(void)
151{
152 unsigned long long this_offset;
153
154 /*
155 * In the NUMA case we dont use the TSC as they are not
156 * synchronized across all CPUs.
157 */
158#ifndef CONFIG_NUMA
159 if (!use_tsc)
160#endif
161 /* no locking but a rare wrong value is not a big deal */
162 return jiffies_64 * (1000000000 / HZ);
163
164 /* Read the Time Stamp Counter */
165 rdtscll(this_offset);
166
167 /* return the value in ns */
168 return cycles_2_ns(this_offset);
169}
170
171static void delay_tsc(unsigned long loops)
172{
173 unsigned long bclock, now;
174
175 rdtscl(bclock);
176 do
177 {
178 rep_nop();
179 rdtscl(now);
180 } while ((now-bclock) < loops);
181}
182
183#ifdef CONFIG_HPET_TIMER
184static void mark_offset_tsc_hpet(void)
185{
186 unsigned long long this_offset, last_offset;
187 unsigned long offset, temp, hpet_current;
188
189 write_seqlock(&monotonic_lock);
190 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
191 /*
192 * It is important that these two operations happen almost at
193 * the same time. We do the RDTSC stuff first, since it's
194 * faster. To avoid any inconsistencies, we need interrupts
195 * disabled locally.
196 */
197 /*
198 * Interrupts are just disabled locally since the timer irq
199 * has the SA_INTERRUPT flag set. -arca
200 */
201 /* read Pentium cycle counter */
202
203 hpet_current = hpet_readl(HPET_COUNTER);
204 rdtsc(last_tsc_low, last_tsc_high);
205
206 /* lost tick compensation */
207 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
208 if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))
209 && detect_lost_ticks) {
210 int lost_ticks = (offset - hpet_last) / hpet_tick;
211 jiffies_64 += lost_ticks;
212 }
213 hpet_last = hpet_current;
214
215 /* update the monotonic base value */
216 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
217 monotonic_base += cycles_2_ns(this_offset - last_offset);
218 write_sequnlock(&monotonic_lock);
219
220 /* calculate delay_at_last_interrupt */
221 /*
222 * Time offset = (hpet delta) * ( usecs per HPET clock )
223 * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
224 * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
225 * Where,
226 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
227 */
228 delay_at_last_interrupt = hpet_current - offset;
229 ASM_MUL64_REG(temp, delay_at_last_interrupt,
230 hpet_usec_quotient, delay_at_last_interrupt);
231}
232#endif
233
234
235#ifdef CONFIG_CPU_FREQ
236#include <linux/workqueue.h>
237
238static unsigned int cpufreq_delayed_issched = 0;
239static unsigned int cpufreq_init = 0;
240static struct work_struct cpufreq_delayed_get_work;
241
242static void handle_cpufreq_delayed_get(void *v)
243{
244 unsigned int cpu;
245 for_each_online_cpu(cpu) {
246 cpufreq_get(cpu);
247 }
248 cpufreq_delayed_issched = 0;
249}
250
251/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
252 * to verify the CPU frequency the timing core thinks the CPU is running
253 * at is still correct.
254 */
255static inline void cpufreq_delayed_get(void)
256{
257 if (cpufreq_init && !cpufreq_delayed_issched) {
258 cpufreq_delayed_issched = 1;
259 printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
260 schedule_work(&cpufreq_delayed_get_work);
261 }
262}
263
264/* If the CPU frequency is scaled, TSC-based delays will need a different
265 * loops_per_jiffy value to function properly.
266 */
267
268static unsigned int ref_freq = 0;
269static unsigned long loops_per_jiffy_ref = 0;
270
271#ifndef CONFIG_SMP
272static unsigned long fast_gettimeoffset_ref = 0;
273static unsigned int cpu_khz_ref = 0;
274#endif
275
276static int
277time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
278 void *data)
279{
280 struct cpufreq_freqs *freq = data;
281
282 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
283 write_seqlock_irq(&xtime_lock);
284 if (!ref_freq) {
285 if (!freq->old){
286 ref_freq = freq->new;
287 goto end;
288 }
289 ref_freq = freq->old;
290 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
291#ifndef CONFIG_SMP
292 fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
293 cpu_khz_ref = cpu_khz;
294#endif
295 }
296
297 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
298 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
299 (val == CPUFREQ_RESUMECHANGE)) {
300 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
301 cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
302#ifndef CONFIG_SMP
303 if (cpu_khz)
304 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
305 if (use_tsc) {
306 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
307 fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
308 set_cyc2ns_scale(cpu_khz);
309 }
310 }
311#endif
312 }
313
314end:
315 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
316 write_sequnlock_irq(&xtime_lock);
317
318 return 0;
319}
320
321static struct notifier_block time_cpufreq_notifier_block = {
322 .notifier_call = time_cpufreq_notifier
323};
324
325
326static int __init cpufreq_tsc(void)
327{
328 int ret;
329 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
330 ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
331 CPUFREQ_TRANSITION_NOTIFIER);
332 if (!ret)
333 cpufreq_init = 1;
334 return ret;
335}
336core_initcall(cpufreq_tsc);
337
338#else /* CONFIG_CPU_FREQ */
339static inline void cpufreq_delayed_get(void) { return; }
340#endif
341
342int recalibrate_cpu_khz(void)
343{
344#ifndef CONFIG_SMP
345 unsigned int cpu_khz_old = cpu_khz;
346
347 if (cpu_has_tsc) {
348 local_irq_disable();
349 init_cpu_khz();
350 local_irq_enable();
351 cpu_data[0].loops_per_jiffy =
352 cpufreq_scale(cpu_data[0].loops_per_jiffy,
353 cpu_khz_old,
354 cpu_khz);
355 return 0;
356 } else
357 return -ENODEV;
358#else
359 return -ENODEV;
360#endif
361}
362EXPORT_SYMBOL(recalibrate_cpu_khz);
363
364static void mark_offset_tsc(void)
365{
366 unsigned long lost,delay;
367 unsigned long delta = last_tsc_low;
368 int count;
369 int countmp;
370 static int count1 = 0;
371 unsigned long long this_offset, last_offset;
372 static int lost_count = 0;
373
374 write_seqlock(&monotonic_lock);
375 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
376 /*
377 * It is important that these two operations happen almost at
378 * the same time. We do the RDTSC stuff first, since it's
379 * faster. To avoid any inconsistencies, we need interrupts
380 * disabled locally.
381 */
382
383 /*
384 * Interrupts are just disabled locally since the timer irq
385 * has the SA_INTERRUPT flag set. -arca
386 */
387
388 /* read Pentium cycle counter */
389
390 rdtsc(last_tsc_low, last_tsc_high);
391
392 spin_lock(&i8253_lock);
393 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
394
395 count = inb_p(PIT_CH0); /* read the latched count */
396 count |= inb(PIT_CH0) << 8;
397
398 /*
399 * VIA686a test code... reset the latch if count > max + 1
400 * from timer_pit.c - cjb
401 */
402 if (count > LATCH) {
403 outb_p(0x34, PIT_MODE);
404 outb_p(LATCH & 0xff, PIT_CH0);
405 outb(LATCH >> 8, PIT_CH0);
406 count = LATCH - 1;
407 }
408
409 spin_unlock(&i8253_lock);
410
411 if (pit_latch_buggy) {
412 /* get center value of last 3 time lutch */
413 if ((count2 >= count && count >= count1)
414 || (count1 >= count && count >= count2)) {
415 count2 = count1; count1 = count;
416 } else if ((count1 >= count2 && count2 >= count)
417 || (count >= count2 && count2 >= count1)) {
418 countmp = count;count = count2;
419 count2 = count1;count1 = countmp;
420 } else {
421 count2 = count1; count1 = count; count = count1;
422 }
423 }
424
425 /* lost tick compensation */
426 delta = last_tsc_low - delta;
427 {
428 register unsigned long eax, edx;
429 eax = delta;
430 __asm__("mull %2"
431 :"=a" (eax), "=d" (edx)
432 :"rm" (fast_gettimeoffset_quotient),
433 "0" (eax));
434 delta = edx;
435 }
436 delta += delay_at_last_interrupt;
437 lost = delta/(1000000/HZ);
438 delay = delta%(1000000/HZ);
439 if (lost >= 2 && detect_lost_ticks) {
440 jiffies_64 += lost-1;
441
442 /* sanity check to ensure we're not always losing ticks */
443 if (lost_count++ > 100) {
444 printk(KERN_WARNING "Losing too many ticks!\n");
445 printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
446 printk(KERN_WARNING "Possible reasons for this are:\n");
447 printk(KERN_WARNING " You're running with Speedstep,\n");
448 printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
449 printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
450 printk(KERN_WARNING "Falling back to a sane timesource now.\n");
451
452 clock_fallback();
453 }
454 /* ... but give the TSC a fair chance */
455 if (lost_count > 25)
456 cpufreq_delayed_get();
457 } else
458 lost_count = 0;
459 /* update the monotonic base value */
460 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
461 monotonic_base += cycles_2_ns(this_offset - last_offset);
462 write_sequnlock(&monotonic_lock);
463
464 /* calculate delay_at_last_interrupt */
465 count = ((LATCH-1) - count) * TICK_SIZE;
466 delay_at_last_interrupt = (count + LATCH/2) / LATCH;
467
468 /* catch corner case where tick rollover occured
469 * between tsc and pit reads (as noted when
470 * usec delta is > 90% # of usecs/tick)
471 */
472 if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
473 jiffies_64++;
474}
475
476static int __init init_tsc(char* override)
477{
478
479 /* check clock override */
480 if (override[0] && strncmp(override,"tsc",3)) {
481#ifdef CONFIG_HPET_TIMER
482 if (is_hpet_enabled()) {
483 printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
484 } else
485#endif
486 {
487 return -ENODEV;
488 }
489 }
490
491 /*
492 * If we have APM enabled or the CPU clock speed is variable
493 * (CPU stops clock on HLT or slows clock to save power)
494 * then the TSC timestamps may diverge by up to 1 jiffy from
495 * 'real time' but nothing will break.
496 * The most frequent case is that the CPU is "woken" from a halt
497 * state by the timer interrupt itself, so we get 0 error. In the
498 * rare cases where a driver would "wake" the CPU and request a
499 * timestamp, the maximum error is < 1 jiffy. But timestamps are
500 * still perfectly ordered.
501 * Note that the TSC counter will be reset if APM suspends
502 * to disk; this won't break the kernel, though, 'cuz we're
503 * smart. See arch/i386/kernel/apm.c.
504 */
505 /*
506 * Firstly we have to do a CPU check for chips with
507 * a potentially buggy TSC. At this point we haven't run
508 * the ident/bugs checks so we must run this hook as it
509 * may turn off the TSC flag.
510 *
511 * NOTE: this doesn't yet handle SMP 486 machines where only
512 * some CPU's have a TSC. Thats never worked and nobody has
513 * moaned if you have the only one in the world - you fix it!
514 */
515
516 count2 = LATCH; /* initialize counter for mark_offset_tsc() */
517
518 if (cpu_has_tsc) {
519 unsigned long tsc_quotient;
520#ifdef CONFIG_HPET_TIMER
521 if (is_hpet_enabled() && hpet_use_timer) {
522 unsigned long result, remain;
523 printk("Using TSC for gettimeofday\n");
524 tsc_quotient = calibrate_tsc_hpet(NULL);
525 timer_tsc.mark_offset = &mark_offset_tsc_hpet;
526 /*
527 * Math to calculate hpet to usec multiplier
528 * Look for the comments at get_offset_tsc_hpet()
529 */
530 ASM_DIV64_REG(result, remain, hpet_tick,
531 0, KERNEL_TICK_USEC);
532 if (remain > (hpet_tick >> 1))
533 result++; /* rounding the result */
534
535 hpet_usec_quotient = result;
536 } else
537#endif
538 {
539 tsc_quotient = calibrate_tsc();
540 }
541
542 if (tsc_quotient) {
543 fast_gettimeoffset_quotient = tsc_quotient;
544 use_tsc = 1;
545 /*
546 * We could be more selective here I suspect
547 * and just enable this for the next intel chips ?
548 */
549 /* report CPU clock rate in Hz.
550 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
551 * clock/second. Our precision is about 100 ppm.
552 */
553 { unsigned long eax=0, edx=1000;
554 __asm__("divl %2"
555 :"=a" (cpu_khz), "=d" (edx)
556 :"r" (tsc_quotient),
557 "0" (eax), "1" (edx));
558 printk("Detected %u.%03u MHz processor.\n",
559 cpu_khz / 1000, cpu_khz % 1000);
560 }
561 set_cyc2ns_scale(cpu_khz);
562 return 0;
563 }
564 }
565 return -ENODEV;
566}
567
568static int tsc_resume(void)
569{
570 write_seqlock(&monotonic_lock);
571 /* Assume this is the last mark offset time */
572 rdtsc(last_tsc_low, last_tsc_high);
573#ifdef CONFIG_HPET_TIMER
574 if (is_hpet_enabled() && hpet_use_timer)
575 hpet_last = hpet_readl(HPET_COUNTER);
576#endif
577 write_sequnlock(&monotonic_lock);
578 return 0;
579}
580
581#ifndef CONFIG_X86_TSC
582/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
583 * in cpu/common.c */
584static int __init tsc_setup(char *str)
585{
586 tsc_disable = 1;
587 return 1;
588}
589#else
590static int __init tsc_setup(char *str)
591{
592 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
593 "cannot disable TSC.\n");
594 return 1;
595}
596#endif
597__setup("notsc", tsc_setup);
598
599
600
601/************************************************************/
602
603/* tsc timer_opts struct */
604static struct timer_opts timer_tsc = {
605 .name = "tsc",
606 .mark_offset = mark_offset_tsc,
607 .get_offset = get_offset_tsc,
608 .monotonic_clock = monotonic_clock_tsc,
609 .delay = delay_tsc,
610 .read_timer = read_timer_tsc,
611 .resume = tsc_resume,
612};
613
614struct init_timer_opts __initdata timer_tsc_init = {
615 .init = init_tsc,
616 .opts = &timer_tsc,
617};
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
new file mode 100644
index 000000000000..7e0d8dab2075
--- /dev/null
+++ b/arch/i386/kernel/tsc.c
@@ -0,0 +1,478 @@
1/*
2 * This code largely moved from arch/i386/kernel/timer/timer_tsc.c
3 * which was originally moved from arch/i386/kernel/time.c.
4 * See comments there for proper credits.
5 */
6
7#include <linux/clocksource.h>
8#include <linux/workqueue.h>
9#include <linux/cpufreq.h>
10#include <linux/jiffies.h>
11#include <linux/init.h>
12#include <linux/dmi.h>
13
14#include <asm/delay.h>
15#include <asm/tsc.h>
16#include <asm/delay.h>
17#include <asm/io.h>
18
19#include "mach_timer.h"
20
21/*
22 * On some systems the TSC frequency does not
23 * change with the cpu frequency. So we need
24 * an extra value to store the TSC freq
25 */
26unsigned int tsc_khz;
27
28int tsc_disable __cpuinitdata = 0;
29
30#ifdef CONFIG_X86_TSC
31static int __init tsc_setup(char *str)
32{
33 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
34 "cannot disable TSC.\n");
35 return 1;
36}
37#else
38/*
39 * disable flag for tsc. Takes effect by clearing the TSC cpu flag
40 * in cpu/common.c
41 */
42static int __init tsc_setup(char *str)
43{
44 tsc_disable = 1;
45
46 return 1;
47}
48#endif
49
50__setup("notsc", tsc_setup);
51
52/*
53 * code to mark and check if the TSC is unstable
54 * due to cpufreq or due to unsynced TSCs
55 */
56static int tsc_unstable;
57
58static inline int check_tsc_unstable(void)
59{
60 return tsc_unstable;
61}
62
63void mark_tsc_unstable(void)
64{
65 tsc_unstable = 1;
66}
67EXPORT_SYMBOL_GPL(mark_tsc_unstable);
68
69/* Accellerators for sched_clock()
70 * convert from cycles(64bits) => nanoseconds (64bits)
71 * basic equation:
72 * ns = cycles / (freq / ns_per_sec)
73 * ns = cycles * (ns_per_sec / freq)
74 * ns = cycles * (10^9 / (cpu_khz * 10^3))
75 * ns = cycles * (10^6 / cpu_khz)
76 *
77 * Then we use scaling math (suggested by george@mvista.com) to get:
78 * ns = cycles * (10^6 * SC / cpu_khz) / SC
79 * ns = cycles * cyc2ns_scale / SC
80 *
81 * And since SC is a constant power of two, we can convert the div
82 * into a shift.
83 *
84 * We can use khz divisor instead of mhz to keep a better percision, since
85 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
86 * (mathieu.desnoyers@polymtl.ca)
87 *
88 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
89 */
90static unsigned long cyc2ns_scale __read_mostly;
91
92#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
93
94static inline void set_cyc2ns_scale(unsigned long cpu_khz)
95{
96 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
97}
98
99static inline unsigned long long cycles_2_ns(unsigned long long cyc)
100{
101 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
102}
103
104/*
105 * Scheduler clock - returns current time in nanosec units.
106 */
107unsigned long long sched_clock(void)
108{
109 unsigned long long this_offset;
110
111 /*
112 * in the NUMA case we dont use the TSC as they are not
113 * synchronized across all CPUs.
114 */
115#ifndef CONFIG_NUMA
116 if (!cpu_khz || check_tsc_unstable())
117#endif
118 /* no locking but a rare wrong value is not a big deal */
119 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
120
121 /* read the Time Stamp Counter: */
122 rdtscll(this_offset);
123
124 /* return the value in ns */
125 return cycles_2_ns(this_offset);
126}
127
128static unsigned long calculate_cpu_khz(void)
129{
130 unsigned long long start, end;
131 unsigned long count;
132 u64 delta64;
133 int i;
134 unsigned long flags;
135
136 local_irq_save(flags);
137
138 /* run 3 times to ensure the cache is warm */
139 for (i = 0; i < 3; i++) {
140 mach_prepare_counter();
141 rdtscll(start);
142 mach_countup(&count);
143 rdtscll(end);
144 }
145 /*
146 * Error: ECTCNEVERSET
147 * The CTC wasn't reliable: we got a hit on the very first read,
148 * or the CPU was so fast/slow that the quotient wouldn't fit in
149 * 32 bits..
150 */
151 if (count <= 1)
152 goto err;
153
154 delta64 = end - start;
155
156 /* cpu freq too fast: */
157 if (delta64 > (1ULL<<32))
158 goto err;
159
160 /* cpu freq too slow: */
161 if (delta64 <= CALIBRATE_TIME_MSEC)
162 goto err;
163
164 delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
165 do_div(delta64,CALIBRATE_TIME_MSEC);
166
167 local_irq_restore(flags);
168 return (unsigned long)delta64;
169err:
170 local_irq_restore(flags);
171 return 0;
172}
173
174int recalibrate_cpu_khz(void)
175{
176#ifndef CONFIG_SMP
177 unsigned long cpu_khz_old = cpu_khz;
178
179 if (cpu_has_tsc) {
180 cpu_khz = calculate_cpu_khz();
181 tsc_khz = cpu_khz;
182 cpu_data[0].loops_per_jiffy =
183 cpufreq_scale(cpu_data[0].loops_per_jiffy,
184 cpu_khz_old, cpu_khz);
185 return 0;
186 } else
187 return -ENODEV;
188#else
189 return -ENODEV;
190#endif
191}
192
193EXPORT_SYMBOL(recalibrate_cpu_khz);
194
195void tsc_init(void)
196{
197 if (!cpu_has_tsc || tsc_disable)
198 return;
199
200 cpu_khz = calculate_cpu_khz();
201 tsc_khz = cpu_khz;
202
203 if (!cpu_khz)
204 return;
205
206 printk("Detected %lu.%03lu MHz processor.\n",
207 (unsigned long)cpu_khz / 1000,
208 (unsigned long)cpu_khz % 1000);
209
210 set_cyc2ns_scale(cpu_khz);
211 use_tsc_delay();
212}
213
214#ifdef CONFIG_CPU_FREQ
215
216static unsigned int cpufreq_delayed_issched = 0;
217static unsigned int cpufreq_init = 0;
218static struct work_struct cpufreq_delayed_get_work;
219
220static void handle_cpufreq_delayed_get(void *v)
221{
222 unsigned int cpu;
223
224 for_each_online_cpu(cpu)
225 cpufreq_get(cpu);
226
227 cpufreq_delayed_issched = 0;
228}
229
230/*
231 * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries
232 * to verify the CPU frequency the timing core thinks the CPU is running
233 * at is still correct.
234 */
235static inline void cpufreq_delayed_get(void)
236{
237 if (cpufreq_init && !cpufreq_delayed_issched) {
238 cpufreq_delayed_issched = 1;
239 printk(KERN_DEBUG "Checking if CPU frequency changed.\n");
240 schedule_work(&cpufreq_delayed_get_work);
241 }
242}
243
244/*
245 * if the CPU frequency is scaled, TSC-based delays will need a different
246 * loops_per_jiffy value to function properly.
247 */
248static unsigned int ref_freq = 0;
249static unsigned long loops_per_jiffy_ref = 0;
250static unsigned long cpu_khz_ref = 0;
251
252static int
253time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
254{
255 struct cpufreq_freqs *freq = data;
256
257 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
258 write_seqlock_irq(&xtime_lock);
259
260 if (!ref_freq) {
261 if (!freq->old){
262 ref_freq = freq->new;
263 goto end;
264 }
265 ref_freq = freq->old;
266 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
267 cpu_khz_ref = cpu_khz;
268 }
269
270 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
271 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
272 (val == CPUFREQ_RESUMECHANGE)) {
273 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
274 cpu_data[freq->cpu].loops_per_jiffy =
275 cpufreq_scale(loops_per_jiffy_ref,
276 ref_freq, freq->new);
277
278 if (cpu_khz) {
279
280 if (num_online_cpus() == 1)
281 cpu_khz = cpufreq_scale(cpu_khz_ref,
282 ref_freq, freq->new);
283 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
284 tsc_khz = cpu_khz;
285 set_cyc2ns_scale(cpu_khz);
286 /*
287 * TSC based sched_clock turns
288 * to junk w/ cpufreq
289 */
290 mark_tsc_unstable();
291 }
292 }
293 }
294end:
295 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
296 write_sequnlock_irq(&xtime_lock);
297
298 return 0;
299}
300
301static struct notifier_block time_cpufreq_notifier_block = {
302 .notifier_call = time_cpufreq_notifier
303};
304
305static int __init cpufreq_tsc(void)
306{
307 int ret;
308
309 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
310 ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
311 CPUFREQ_TRANSITION_NOTIFIER);
312 if (!ret)
313 cpufreq_init = 1;
314
315 return ret;
316}
317
318core_initcall(cpufreq_tsc);
319
320#endif
321
322/* clock source code */
323
324static unsigned long current_tsc_khz = 0;
325static int tsc_update_callback(void);
326
327static cycle_t read_tsc(void)
328{
329 cycle_t ret;
330
331 rdtscll(ret);
332
333 return ret;
334}
335
336static struct clocksource clocksource_tsc = {
337 .name = "tsc",
338 .rating = 300,
339 .read = read_tsc,
340 .mask = CLOCKSOURCE_MASK(64),
341 .mult = 0, /* to be set */
342 .shift = 22,
343 .update_callback = tsc_update_callback,
344 .is_continuous = 1,
345};
346
347static int tsc_update_callback(void)
348{
349 int change = 0;
350
351 /* check to see if we should switch to the safe clocksource: */
352 if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
353 clocksource_tsc.rating = 50;
354 clocksource_reselect();
355 change = 1;
356 }
357
358 /* only update if tsc_khz has changed: */
359 if (current_tsc_khz != tsc_khz) {
360 current_tsc_khz = tsc_khz;
361 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
362 clocksource_tsc.shift);
363 change = 1;
364 }
365
366 return change;
367}
368
369static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d)
370{
371 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
372 d->ident);
373 mark_tsc_unstable();
374 return 0;
375}
376
377/* List of systems that have known TSC problems */
378static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
379 {
380 .callback = dmi_mark_tsc_unstable,
381 .ident = "IBM Thinkpad 380XD",
382 .matches = {
383 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
384 DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
385 },
386 },
387 {}
388};
389
390#define TSC_FREQ_CHECK_INTERVAL (10*MSEC_PER_SEC) /* 10sec in MS */
391static struct timer_list verify_tsc_freq_timer;
392
393/* XXX - Probably should add locking */
394static void verify_tsc_freq(unsigned long unused)
395{
396 static u64 last_tsc;
397 static unsigned long last_jiffies;
398
399 u64 now_tsc, interval_tsc;
400 unsigned long now_jiffies, interval_jiffies;
401
402
403 if (check_tsc_unstable())
404 return;
405
406 rdtscll(now_tsc);
407 now_jiffies = jiffies;
408
409 if (!last_jiffies) {
410 goto out;
411 }
412
413 interval_jiffies = now_jiffies - last_jiffies;
414 interval_tsc = now_tsc - last_tsc;
415 interval_tsc *= HZ;
416 do_div(interval_tsc, cpu_khz*1000);
417
418 if (interval_tsc < (interval_jiffies * 3 / 4)) {
419 printk("TSC appears to be running slowly. "
420 "Marking it as unstable\n");
421 mark_tsc_unstable();
422 return;
423 }
424
425out:
426 last_tsc = now_tsc;
427 last_jiffies = now_jiffies;
428 /* set us up to go off on the next interval: */
429 mod_timer(&verify_tsc_freq_timer,
430 jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL));
431}
432
433/*
434 * Make an educated guess if the TSC is trustworthy and synchronized
435 * over all CPUs.
436 */
437static __init int unsynchronized_tsc(void)
438{
439 /*
440 * Intel systems are normally all synchronized.
441 * Exceptions must mark TSC as unstable:
442 */
443 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
444 return 0;
445
446 /* assume multi socket systems are not synchronized: */
447 return num_possible_cpus() > 1;
448}
449
450static int __init init_tsc_clocksource(void)
451{
452
453 if (cpu_has_tsc && tsc_khz && !tsc_disable) {
454 /* check blacklist */
455 dmi_check_system(bad_tsc_dmi_table);
456
457 if (unsynchronized_tsc()) /* mark unstable if unsynced */
458 mark_tsc_unstable();
459 current_tsc_khz = tsc_khz;
460 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
461 clocksource_tsc.shift);
462 /* lower the rating if we already know its unstable: */
463 if (check_tsc_unstable())
464 clocksource_tsc.rating = 50;
465
466 init_timer(&verify_tsc_freq_timer);
467 verify_tsc_freq_timer.function = verify_tsc_freq;
468 verify_tsc_freq_timer.expires =
469 jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL);
470 add_timer(&verify_tsc_freq_timer);
471
472 return clocksource_register(&clocksource_tsc);
473 }
474
475 return 0;
476}
477
478module_init(init_tsc_clocksource);
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
index c49a6acbee56..3c0714c4b669 100644
--- a/arch/i386/lib/delay.c
+++ b/arch/i386/lib/delay.c
@@ -10,43 +10,92 @@
10 * we have to worry about. 10 * we have to worry about.
11 */ 11 */
12 12
13#include <linux/module.h>
13#include <linux/config.h> 14#include <linux/config.h>
14#include <linux/sched.h> 15#include <linux/sched.h>
15#include <linux/delay.h> 16#include <linux/delay.h>
16#include <linux/module.h> 17
17#include <asm/processor.h> 18#include <asm/processor.h>
18#include <asm/delay.h> 19#include <asm/delay.h>
19#include <asm/timer.h> 20#include <asm/timer.h>
20 21
21#ifdef CONFIG_SMP 22#ifdef CONFIG_SMP
22#include <asm/smp.h> 23# include <asm/smp.h>
23#endif 24#endif
24 25
25extern struct timer_opts* timer; 26/* simple loop based delay: */
27static void delay_loop(unsigned long loops)
28{
29 int d0;
30
31 __asm__ __volatile__(
32 "\tjmp 1f\n"
33 ".align 16\n"
34 "1:\tjmp 2f\n"
35 ".align 16\n"
36 "2:\tdecl %0\n\tjns 2b"
37 :"=&a" (d0)
38 :"0" (loops));
39}
40
41/* TSC based delay: */
42static void delay_tsc(unsigned long loops)
43{
44 unsigned long bclock, now;
45
46 rdtscl(bclock);
47 do {
48 rep_nop();
49 rdtscl(now);
50 } while ((now-bclock) < loops);
51}
52
53/*
54 * Since we calibrate only once at boot, this
55 * function should be set once at boot and not changed
56 */
57static void (*delay_fn)(unsigned long) = delay_loop;
58
59void use_tsc_delay(void)
60{
61 delay_fn = delay_tsc;
62}
63
64int read_current_timer(unsigned long *timer_val)
65{
66 if (delay_fn == delay_tsc) {
67 rdtscl(*timer_val);
68 return 0;
69 }
70 return -1;
71}
26 72
27void __delay(unsigned long loops) 73void __delay(unsigned long loops)
28{ 74{
29 cur_timer->delay(loops); 75 delay_fn(loops);
30} 76}
31 77
32inline void __const_udelay(unsigned long xloops) 78inline void __const_udelay(unsigned long xloops)
33{ 79{
34 int d0; 80 int d0;
81
35 xloops *= 4; 82 xloops *= 4;
36 __asm__("mull %0" 83 __asm__("mull %0"
37 :"=d" (xloops), "=&a" (d0) 84 :"=d" (xloops), "=&a" (d0)
38 :"1" (xloops),"0" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); 85 :"1" (xloops), "0"
39 __delay(++xloops); 86 (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)));
87
88 __delay(++xloops);
40} 89}
41 90
42void __udelay(unsigned long usecs) 91void __udelay(unsigned long usecs)
43{ 92{
44 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ 93 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
45} 94}
46 95
47void __ndelay(unsigned long nsecs) 96void __ndelay(unsigned long nsecs)
48{ 97{
49 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ 98 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
50} 99}
51 100
52EXPORT_SYMBOL(__delay); 101EXPORT_SYMBOL(__delay);
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index bd6fe96cc16d..6ee7faaf2c1b 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -30,6 +30,40 @@
30 30
31extern void die(const char *,struct pt_regs *,long); 31extern void die(const char *,struct pt_regs *,long);
32 32
33#ifdef CONFIG_KPROBES
34ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
35int register_page_fault_notifier(struct notifier_block *nb)
36{
37 vmalloc_sync_all();
38 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
39}
40
41int unregister_page_fault_notifier(struct notifier_block *nb)
42{
43 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
44}
45
46static inline int notify_page_fault(enum die_val val, const char *str,
47 struct pt_regs *regs, long err, int trap, int sig)
48{
49 struct die_args args = {
50 .regs = regs,
51 .str = str,
52 .err = err,
53 .trapnr = trap,
54 .signr = sig
55 };
56 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
57}
58#else
59static inline int notify_page_fault(enum die_val val, const char *str,
60 struct pt_regs *regs, long err, int trap, int sig)
61{
62 return NOTIFY_DONE;
63}
64#endif
65
66
33/* 67/*
34 * Unlock any spinlocks which will prevent us from getting the 68 * Unlock any spinlocks which will prevent us from getting the
35 * message out 69 * message out
@@ -324,7 +358,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
324 if (unlikely(address >= TASK_SIZE)) { 358 if (unlikely(address >= TASK_SIZE)) {
325 if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) 359 if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
326 return; 360 return;
327 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 361 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
328 SIGSEGV) == NOTIFY_STOP) 362 SIGSEGV) == NOTIFY_STOP)
329 return; 363 return;
330 /* 364 /*
@@ -334,7 +368,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
334 goto bad_area_nosemaphore; 368 goto bad_area_nosemaphore;
335 } 369 }
336 370
337 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 371 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
338 SIGSEGV) == NOTIFY_STOP) 372 SIGSEGV) == NOTIFY_STOP)
339 return; 373 return;
340 374
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index ec0fd3cfa774..fa8a37bcb391 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -281,9 +281,9 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root)
281 281
282 for (i = 0; i < model->num_counters; ++i) { 282 for (i = 0; i < model->num_counters; ++i) {
283 struct dentry * dir; 283 struct dentry * dir;
284 char buf[2]; 284 char buf[4];
285 285
286 snprintf(buf, 2, "%d", i); 286 snprintf(buf, sizeof(buf), "%d", i);
287 dir = oprofilefs_mkdir(sb, root, buf); 287 dir = oprofilefs_mkdir(sb, root, buf);
288 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 288 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
289 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); 289 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c
index 1eec0868f4b3..ed1512a175ab 100644
--- a/arch/i386/pci/pcbios.c
+++ b/arch/i386/pci/pcbios.c
@@ -371,8 +371,7 @@ void __devinit pcibios_sort(void)
371 list_for_each(ln, &pci_devices) { 371 list_for_each(ln, &pci_devices) {
372 d = pci_dev_g(ln); 372 d = pci_dev_g(ln);
373 if (d->bus->number == bus && d->devfn == devfn) { 373 if (d->bus->number == bus && d->devfn == devfn) {
374 list_del(&d->global_list); 374 list_move_tail(&d->global_list, &sorted_devices);
375 list_add_tail(&d->global_list, &sorted_devices);
376 if (d == dev) 375 if (d == dev)
377 found = 1; 376 found = 1;
378 break; 377 break;
@@ -390,8 +389,7 @@ void __devinit pcibios_sort(void)
390 if (!found) { 389 if (!found) {
391 printk(KERN_WARNING "PCI: Device %s not found by BIOS\n", 390 printk(KERN_WARNING "PCI: Device %s not found by BIOS\n",
392 pci_name(dev)); 391 pci_name(dev));
393 list_del(&dev->global_list); 392 list_move_tail(&dev->global_list, &sorted_devices);
394 list_add_tail(&dev->global_list, &sorted_devices);
395 } 393 }
396 } 394 }
397 list_splice(&sorted_devices, &pci_devices); 395 list_splice(&sorted_devices, &pci_devices);
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index d98ec49570b8..14ef7cceb208 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -19,6 +19,40 @@
19 19
20extern void die (char *, struct pt_regs *, long); 20extern void die (char *, struct pt_regs *, long);
21 21
22#ifdef CONFIG_KPROBES
23ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
24
25/* Hook to register for page fault notifications */
26int register_page_fault_notifier(struct notifier_block *nb)
27{
28 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
29}
30
31int unregister_page_fault_notifier(struct notifier_block *nb)
32{
33 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
34}
35
36static inline int notify_page_fault(enum die_val val, const char *str,
37 struct pt_regs *regs, long err, int trap, int sig)
38{
39 struct die_args args = {
40 .regs = regs,
41 .str = str,
42 .err = err,
43 .trapnr = trap,
44 .signr = sig
45 };
46 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
47}
48#else
49static inline int notify_page_fault(enum die_val val, const char *str,
50 struct pt_regs *regs, long err, int trap, int sig)
51{
52 return NOTIFY_DONE;
53}
54#endif
55
22/* 56/*
23 * Return TRUE if ADDRESS points at a page in the kernel's mapped segment 57 * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
24 * (inside region 5, on ia64) and that page is present. 58 * (inside region 5, on ia64) and that page is present.
@@ -84,7 +118,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
84 /* 118 /*
85 * This is to handle the kprobes on user space access instructions 119 * This is to handle the kprobes on user space access instructions
86 */ 120 */
87 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, code, TRAP_BRKPT, 121 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, code, TRAP_BRKPT,
88 SIGSEGV) == NOTIFY_STOP) 122 SIGSEGV) == NOTIFY_STOP)
89 return; 123 return;
90 124
diff --git a/arch/m68k/mm/memory.c b/arch/m68k/mm/memory.c
index d6d582a5abb0..a226668f20c3 100644
--- a/arch/m68k/mm/memory.c
+++ b/arch/m68k/mm/memory.c
@@ -94,8 +94,7 @@ pmd_t *get_pointer_table (void)
94 PD_MARKBITS(dp) = mask & ~tmp; 94 PD_MARKBITS(dp) = mask & ~tmp;
95 if (!PD_MARKBITS(dp)) { 95 if (!PD_MARKBITS(dp)) {
96 /* move to end of list */ 96 /* move to end of list */
97 list_del(dp); 97 list_move_tail(dp, &ptable_list);
98 list_add_tail(dp, &ptable_list);
99 } 98 }
100 return (pmd_t *) (page_address(PD_PAGE(dp)) + off); 99 return (pmd_t *) (page_address(PD_PAGE(dp)) + off);
101} 100}
@@ -123,8 +122,7 @@ int free_pointer_table (pmd_t *ptable)
123 * move this descriptor to the front of the list, since 122 * move this descriptor to the front of the list, since
124 * it has one or more free tables. 123 * it has one or more free tables.
125 */ 124 */
126 list_del(dp); 125 list_move(dp, &ptable_list);
127 list_add(dp, &ptable_list);
128 } 126 }
129 return 0; 127 return 0;
130} 128}
diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c
index f04a1d25f1a2..97c7bfde8ae8 100644
--- a/arch/m68k/sun3/sun3dvma.c
+++ b/arch/m68k/sun3/sun3dvma.c
@@ -119,8 +119,7 @@ static inline int refill(void)
119 if(hole->end == prev->start) { 119 if(hole->end == prev->start) {
120 hole->size += prev->size; 120 hole->size += prev->size;
121 hole->end = prev->end; 121 hole->end = prev->end;
122 list_del(&(prev->list)); 122 list_move(&(prev->list), &hole_cache);
123 list_add(&(prev->list), &hole_cache);
124 ret++; 123 ret++;
125 } 124 }
126 125
@@ -182,8 +181,7 @@ static inline unsigned long get_baddr(int len, unsigned long align)
182#endif 181#endif
183 return hole->end; 182 return hole->end;
184 } else if(hole->size == newlen) { 183 } else if(hole->size == newlen) {
185 list_del(&(hole->list)); 184 list_move(&(hole->list), &hole_cache);
186 list_add(&(hole->list), &hole_cache);
187 dvma_entry_use(hole->start) = newlen; 185 dvma_entry_use(hole->start) = newlen;
188#ifdef DVMA_DEBUG 186#ifdef DVMA_DEBUG
189 dvma_allocs++; 187 dvma_allocs++;
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index c31e4cff64e0..65eb55400d77 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -38,7 +38,7 @@ static int op_mips_create_files(struct super_block * sb, struct dentry * root)
38 38
39 for (i = 0; i < model->num_counters; ++i) { 39 for (i = 0; i < model->num_counters; ++i) {
40 struct dentry *dir; 40 struct dentry *dir;
41 char buf[3]; 41 char buf[4];
42 42
43 snprintf(buf, sizeof buf, "%d", i); 43 snprintf(buf, sizeof buf, "%d", i);
44 dir = oprofilefs_mkdir(sb, root, buf); 44 dir = oprofilefs_mkdir(sb, root, buf);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index d20907561f46..7dd5dab789a1 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -102,7 +102,7 @@ EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */
102u64 tb_to_xs; 102u64 tb_to_xs;
103unsigned tb_to_us; 103unsigned tb_to_us;
104 104
105#define TICKLEN_SCALE (SHIFT_SCALE - 10) 105#define TICKLEN_SCALE TICK_LENGTH_SHIFT
106u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */ 106u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */
107u64 ticklen_to_xs; /* 0.64 fraction */ 107u64 ticklen_to_xs; /* 0.64 fraction */
108 108
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index fdbba4206d59..a0a9e1e0061e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -40,6 +40,40 @@
40#include <asm/kdebug.h> 40#include <asm/kdebug.h>
41#include <asm/siginfo.h> 41#include <asm/siginfo.h>
42 42
43#ifdef CONFIG_KPROBES
44ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
45
46/* Hook to register for page fault notifications */
47int register_page_fault_notifier(struct notifier_block *nb)
48{
49 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
50}
51
52int unregister_page_fault_notifier(struct notifier_block *nb)
53{
54 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
55}
56
57static inline int notify_page_fault(enum die_val val, const char *str,
58 struct pt_regs *regs, long err, int trap, int sig)
59{
60 struct die_args args = {
61 .regs = regs,
62 .str = str,
63 .err = err,
64 .trapnr = trap,
65 .signr = sig
66 };
67 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
68}
69#else
70static inline int notify_page_fault(enum die_val val, const char *str,
71 struct pt_regs *regs, long err, int trap, int sig)
72{
73 return NOTIFY_DONE;
74}
75#endif
76
43/* 77/*
44 * Check whether the instruction at regs->nip is a store using 78 * Check whether the instruction at regs->nip is a store using
45 * an update addressing form which will update r1. 79 * an update addressing form which will update r1.
@@ -142,7 +176,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
142 is_write = error_code & ESR_DST; 176 is_write = error_code & ESR_DST;
143#endif /* CONFIG_4xx || CONFIG_BOOKE */ 177#endif /* CONFIG_4xx || CONFIG_BOOKE */
144 178
145 if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code, 179 if (notify_page_fault(DIE_PAGE_FAULT, "page_fault", regs, error_code,
146 11, SIGSEGV) == NOTIFY_STOP) 180 11, SIGSEGV) == NOTIFY_STOP)
147 return 0; 181 return 0;
148 182
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index 27ad56bd227e..fd0bbbe7a4de 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -94,7 +94,7 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
94 94
95 for (i = 0; i < model->num_counters; ++i) { 95 for (i = 0; i < model->num_counters; ++i) {
96 struct dentry *dir; 96 struct dentry *dir;
97 char buf[3]; 97 char buf[4];
98 98
99 snprintf(buf, sizeof buf, "%d", i); 99 snprintf(buf, sizeof buf, "%d", i);
100 dir = oprofilefs_mkdir(sb, root, buf); 100 dir = oprofilefs_mkdir(sb, root, buf);
diff --git a/arch/sh/oprofile/op_model_sh7750.c b/arch/sh/oprofile/op_model_sh7750.c
index 5ec9ddcc4b0b..c265185b22a7 100644
--- a/arch/sh/oprofile/op_model_sh7750.c
+++ b/arch/sh/oprofile/op_model_sh7750.c
@@ -198,7 +198,7 @@ static int sh7750_perf_counter_create_files(struct super_block *sb, struct dentr
198 198
199 for (i = 0; i < NR_CNTRS; i++) { 199 for (i = 0; i < NR_CNTRS; i++) {
200 struct dentry *dir; 200 struct dentry *dir;
201 char buf[3]; 201 char buf[4];
202 202
203 snprintf(buf, sizeof(buf), "%d", i); 203 snprintf(buf, sizeof(buf), "%d", i);
204 dir = oprofilefs_mkdir(sb, root, buf); 204 dir = oprofilefs_mkdir(sb, root, buf);
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index 6e002aacb961..1605967cce91 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -31,6 +31,40 @@
31#include <asm/kdebug.h> 31#include <asm/kdebug.h>
32#include <asm/mmu_context.h> 32#include <asm/mmu_context.h>
33 33
34#ifdef CONFIG_KPROBES
35ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
36
37/* Hook to register for page fault notifications */
38int register_page_fault_notifier(struct notifier_block *nb)
39{
40 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
41}
42
43int unregister_page_fault_notifier(struct notifier_block *nb)
44{
45 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
46}
47
48static inline int notify_page_fault(enum die_val val, const char *str,
49 struct pt_regs *regs, long err, int trap, int sig)
50{
51 struct die_args args = {
52 .regs = regs,
53 .str = str,
54 .err = err,
55 .trapnr = trap,
56 .signr = sig
57 };
58 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
59}
60#else
61static inline int notify_page_fault(enum die_val val, const char *str,
62 struct pt_regs *regs, long err, int trap, int sig)
63{
64 return NOTIFY_DONE;
65}
66#endif
67
34/* 68/*
35 * To debug kernel to catch accesses to certain virtual/physical addresses. 69 * To debug kernel to catch accesses to certain virtual/physical addresses.
36 * Mode = 0 selects physical watchpoints, mode = 1 selects virtual watchpoints. 70 * Mode = 0 selects physical watchpoints, mode = 1 selects virtual watchpoints.
@@ -263,7 +297,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
263 297
264 fault_code = get_thread_fault_code(); 298 fault_code = get_thread_fault_code();
265 299
266 if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, 300 if (notify_page_fault(DIE_PAGE_FAULT, "page_fault", regs,
267 fault_code, 0, SIGSEGV) == NOTIFY_STOP) 301 fault_code, 0, SIGSEGV) == NOTIFY_STOP)
268 return; 302 return;
269 303
diff --git a/arch/x86_64/boot/video.S b/arch/x86_64/boot/video.S
index 32327bb37aff..2aa565c136e5 100644
--- a/arch/x86_64/boot/video.S
+++ b/arch/x86_64/boot/video.S
@@ -1929,6 +1929,7 @@ skip10: movb %ah, %al
1929 ret 1929 ret
1930 1930
1931store_edid: 1931store_edid:
1932#ifdef CONFIG_FIRMWARE_EDID
1932 pushw %es # just save all registers 1933 pushw %es # just save all registers
1933 pushw %ax 1934 pushw %ax
1934 pushw %bx 1935 pushw %bx
@@ -1946,6 +1947,22 @@ store_edid:
1946 rep 1947 rep
1947 stosl 1948 stosl
1948 1949
1950 pushw %es # save ES
1951 xorw %di, %di # Report Capability
1952 pushw %di
1953 popw %es # ES:DI must be 0:0
1954 movw $0x4f15, %ax
1955 xorw %bx, %bx
1956 xorw %cx, %cx
1957 int $0x10
1958 popw %es # restore ES
1959
1960 cmpb $0x00, %ah # call successful
1961 jne no_edid
1962
1963 cmpb $0x4f, %al # function supported
1964 jne no_edid
1965
1949 movw $0x4f15, %ax # do VBE/DDC 1966 movw $0x4f15, %ax # do VBE/DDC
1950 movw $0x01, %bx 1967 movw $0x01, %bx
1951 movw $0x00, %cx 1968 movw $0x00, %cx
@@ -1953,12 +1970,14 @@ store_edid:
1953 movw $0x140, %di 1970 movw $0x140, %di
1954 int $0x10 1971 int $0x10
1955 1972
1973no_edid:
1956 popw %di # restore all registers 1974 popw %di # restore all registers
1957 popw %dx 1975 popw %dx
1958 popw %cx 1976 popw %cx
1959 popw %bx 1977 popw %bx
1960 popw %ax 1978 popw %ax
1961 popw %es 1979 popw %es
1980#endif
1962 ret 1981 ret
1963 1982
1964# VIDEO_SELECT-only variables 1983# VIDEO_SELECT-only variables
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c
index bf421ed26808..7554458dc9cb 100644
--- a/arch/x86_64/kernel/pmtimer.c
+++ b/arch/x86_64/kernel/pmtimer.c
@@ -27,7 +27,7 @@
27/* The I/O port the PMTMR resides at. 27/* The I/O port the PMTMR resides at.
28 * The location is detected during setup_arch(), 28 * The location is detected during setup_arch(),
29 * in arch/i386/kernel/acpi/boot.c */ 29 * in arch/i386/kernel/acpi/boot.c */
30u32 pmtmr_ioport; 30u32 pmtmr_ioport __read_mostly;
31 31
32/* value of the Power timer at last timer interrupt */ 32/* value of the Power timer at last timer interrupt */
33static u32 offset_delay; 33static u32 offset_delay;
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index fb850b52b4da..143c65031539 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -109,6 +109,7 @@ struct sys_desc_table_struct {
109}; 109};
110 110
111struct edid_info edid_info; 111struct edid_info edid_info;
112EXPORT_SYMBOL_GPL(edid_info);
112struct e820map e820; 113struct e820map e820;
113 114
114extern int root_mountflags; 115extern int root_mountflags;
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 55250593d8c9..0803d3858af1 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -41,6 +41,41 @@
41#define PF_RSVD (1<<3) 41#define PF_RSVD (1<<3)
42#define PF_INSTR (1<<4) 42#define PF_INSTR (1<<4)
43 43
44#ifdef CONFIG_KPROBES
45ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
46
47/* Hook to register for page fault notifications */
48int register_page_fault_notifier(struct notifier_block *nb)
49{
50 vmalloc_sync_all();
51 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
52}
53
54int unregister_page_fault_notifier(struct notifier_block *nb)
55{
56 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
57}
58
59static inline int notify_page_fault(enum die_val val, const char *str,
60 struct pt_regs *regs, long err, int trap, int sig)
61{
62 struct die_args args = {
63 .regs = regs,
64 .str = str,
65 .err = err,
66 .trapnr = trap,
67 .signr = sig
68 };
69 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
70}
71#else
72static inline int notify_page_fault(enum die_val val, const char *str,
73 struct pt_regs *regs, long err, int trap, int sig)
74{
75 return NOTIFY_DONE;
76}
77#endif
78
44void bust_spinlocks(int yes) 79void bust_spinlocks(int yes)
45{ 80{
46 int loglevel_save = console_loglevel; 81 int loglevel_save = console_loglevel;
@@ -348,7 +383,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
348 if (vmalloc_fault(address) >= 0) 383 if (vmalloc_fault(address) >= 0)
349 return; 384 return;
350 } 385 }
351 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 386 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
352 SIGSEGV) == NOTIFY_STOP) 387 SIGSEGV) == NOTIFY_STOP)
353 return; 388 return;
354 /* 389 /*
@@ -358,7 +393,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
358 goto bad_area_nosemaphore; 393 goto bad_area_nosemaphore;
359 } 394 }
360 395
361 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 396 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
362 SIGSEGV) == NOTIFY_STOP) 397 SIGSEGV) == NOTIFY_STOP)
363 return; 398 return;
364 399