aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile2
-rw-r--r--arch/i386/kernel/acpi/boot.c3
-rw-r--r--arch/i386/kernel/cpu/common.c16
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.c12
-rw-r--r--arch/i386/kernel/cpu/cyrix.c6
-rw-r--r--arch/i386/kernel/cpu/intel.c9
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c9
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/i386/kernel/crash.c2
-rw-r--r--arch/i386/kernel/dmi_scan.c231
-rw-r--r--arch/i386/kernel/doublefault.c2
-rw-r--r--arch/i386/kernel/efi.c110
-rw-r--r--arch/i386/kernel/entry.S22
-rw-r--r--arch/i386/kernel/head.S48
-rw-r--r--arch/i386/kernel/i8237.c67
-rw-r--r--arch/i386/kernel/io_apic.c65
-rw-r--r--arch/i386/kernel/ioport.c7
-rw-r--r--arch/i386/kernel/kprobes.c35
-rw-r--r--arch/i386/kernel/ldt.c7
-rw-r--r--arch/i386/kernel/machine_kexec.c18
-rw-r--r--arch/i386/kernel/microcode.c7
-rw-r--r--arch/i386/kernel/mpparse.c17
-rw-r--r--arch/i386/kernel/msr.c31
-rw-r--r--arch/i386/kernel/nmi.c10
-rw-r--r--arch/i386/kernel/process.c43
-rw-r--r--arch/i386/kernel/ptrace.c63
-rw-r--r--arch/i386/kernel/reboot.c9
-rw-r--r--arch/i386/kernel/semaphore.c162
-rw-r--r--arch/i386/kernel/setup.c24
-rw-r--r--arch/i386/kernel/signal.c8
-rw-r--r--arch/i386/kernel/smp.c2
-rw-r--r--arch/i386/kernel/smpboot.c7
-rw-r--r--arch/i386/kernel/time.c23
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c23
-rw-r--r--arch/i386/kernel/timers/timer_pit.c27
-rw-r--r--arch/i386/kernel/timers/timer_pm.c9
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c14
-rw-r--r--arch/i386/kernel/traps.c27
-rw-r--r--arch/i386/kernel/vm86.c10
-rw-r--r--arch/i386/kernel/vmlinux.lds.S1
-rw-r--r--arch/i386/kernel/vsyscall-sigreturn.S3
41 files changed, 630 insertions, 563 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index c52b4fad011b..f10de0f2c5e6 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.lds
7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ 7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
9 pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ 9 pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
10 doublefault.o quirks.o 10 doublefault.o quirks.o i8237.o
11 11
12obj-y += cpu/ 12obj-y += cpu/
13obj-y += timers/ 13obj-y += timers/
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 0fb23c30eb98..a63351c085c6 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -814,6 +814,9 @@ static void __init acpi_process_madt(void)
814 if (!error) { 814 if (!error) {
815 acpi_lapic = 1; 815 acpi_lapic = 1;
816 816
817#ifdef CONFIG_X86_GENERICARCH
818 generic_bigsmp_probe();
819#endif
817 /* 820 /*
818 * Parse MADT IO-APIC entries 821 * Parse MADT IO-APIC entries
819 */ 822 */
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 4553ffd94b1f..46ce9b248f55 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -613,8 +613,8 @@ void __devinit cpu_init(void)
613 memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu), 613 memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu),
614 GDT_ENTRY_TLS_ENTRIES * 8); 614 GDT_ENTRY_TLS_ENTRIES * 8);
615 615
616 __asm__ __volatile__("lgdt %0" : : "m" (cpu_gdt_descr[cpu])); 616 load_gdt(&cpu_gdt_descr[cpu]);
617 __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); 617 load_idt(&idt_descr);
618 618
619 /* 619 /*
620 * Delete NT 620 * Delete NT
@@ -642,12 +642,12 @@ void __devinit cpu_init(void)
642 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); 642 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
643 643
644 /* Clear all 6 debug registers: */ 644 /* Clear all 6 debug registers: */
645 645 set_debugreg(0, 0);
646#define CD(register) set_debugreg(0, register) 646 set_debugreg(0, 1);
647 647 set_debugreg(0, 2);
648 CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); 648 set_debugreg(0, 3);
649 649 set_debugreg(0, 6);
650#undef CD 650 set_debugreg(0, 7);
651 651
652 /* 652 /*
653 * Force FPU initialization: 653 * Force FPU initialization:
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index 04e3563da4fe..bf02b5026e62 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -64,8 +64,6 @@ static int dont_scale_voltage;
64#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) 64#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
65 65
66 66
67#define __hlt() __asm__ __volatile__("hlt": : :"memory")
68
69/* Clock ratios multiplied by 10 */ 67/* Clock ratios multiplied by 10 */
70static int clock_ratio[32]; 68static int clock_ratio[32];
71static int eblcr_table[32]; 69static int eblcr_table[32];
@@ -168,11 +166,9 @@ static void do_powersaver(union msr_longhaul *longhaul,
168 outb(0xFE,0x21); /* TMR0 only */ 166 outb(0xFE,0x21); /* TMR0 only */
169 outb(0xFF,0x80); /* delay */ 167 outb(0xFF,0x80); /* delay */
170 168
171 local_irq_enable(); 169 safe_halt();
172
173 __hlt();
174 wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); 170 wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
175 __hlt(); 171 halt();
176 172
177 local_irq_disable(); 173 local_irq_disable();
178 174
@@ -251,9 +247,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
251 bcr2.bits.CLOCKMUL = clock_ratio_index; 247 bcr2.bits.CLOCKMUL = clock_ratio_index;
252 local_irq_disable(); 248 local_irq_disable();
253 wrmsrl (MSR_VIA_BCR2, bcr2.val); 249 wrmsrl (MSR_VIA_BCR2, bcr2.val);
254 local_irq_enable(); 250 safe_halt();
255
256 __hlt();
257 251
258 /* Disable software clock multiplier */ 252 /* Disable software clock multiplier */
259 rdmsrl (MSR_VIA_BCR2, bcr2.val); 253 rdmsrl (MSR_VIA_BCR2, bcr2.val);
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index ba4b01138c8f..ff87cc22b323 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -132,11 +132,7 @@ static void __init set_cx86_memwb(void)
132 setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); 132 setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
133 /* set 'Not Write-through' */ 133 /* set 'Not Write-through' */
134 cr0 = 0x20000000; 134 cr0 = 0x20000000;
135 __asm__("movl %%cr0,%%eax\n\t" 135 write_cr0(read_cr0() | cr0);
136 "orl %0,%%eax\n\t"
137 "movl %%eax,%%cr0\n"
138 : : "r" (cr0)
139 :"ax");
140 /* CCR2 bit 2: lock NW bit and set WT1 */ 136 /* CCR2 bit 2: lock NW bit and set WT1 */
141 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); 137 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
142} 138}
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index a2c33c1a46c5..43601de0f633 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -82,16 +82,13 @@ static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
82 */ 82 */
83static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) 83static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
84{ 84{
85 unsigned int eax; 85 unsigned int eax, ebx, ecx, edx;
86 86
87 if (c->cpuid_level < 4) 87 if (c->cpuid_level < 4)
88 return 1; 88 return 1;
89 89
90 __asm__("cpuid" 90 /* Intel has a non-standard dependency on %ecx for this CPUID level. */
91 : "=a" (eax) 91 cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
92 : "0" (4), "c" (0)
93 : "bx", "dx");
94
95 if (eax & 0x1f) 92 if (eax & 0x1f)
96 return ((eax >> 26) + 1); 93 return ((eax >> 26) + 1);
97 else 94 else
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index 6c55b50cf048..9e0d5f83cb9f 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -305,6 +305,9 @@ static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
305{ 305{
306 struct _cpuid4_info *this_leaf; 306 struct _cpuid4_info *this_leaf;
307 unsigned long num_threads_sharing; 307 unsigned long num_threads_sharing;
308#ifdef CONFIG_X86_HT
309 struct cpuinfo_x86 *c = cpu_data + cpu;
310#endif
308 311
309 this_leaf = CPUID4_INFO_IDX(cpu, index); 312 this_leaf = CPUID4_INFO_IDX(cpu, index);
310 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 313 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
@@ -314,10 +317,12 @@ static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
314#ifdef CONFIG_X86_HT 317#ifdef CONFIG_X86_HT
315 else if (num_threads_sharing == smp_num_siblings) 318 else if (num_threads_sharing == smp_num_siblings)
316 this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; 319 this_leaf->shared_cpu_map = cpu_sibling_map[cpu];
317#endif 320 else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings))
321 this_leaf->shared_cpu_map = cpu_core_map[cpu];
318 else 322 else
319 printk(KERN_INFO "Number of CPUs sharing cache didn't match " 323 printk(KERN_DEBUG "Number of CPUs sharing cache didn't match "
320 "any known set of CPUs\n"); 324 "any known set of CPUs\n");
325#endif
321} 326}
322#else 327#else
323static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} 328static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 764cac64e211..dd4ebd6af7e4 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -561,7 +561,7 @@ struct mtrr_value {
561 561
562static struct mtrr_value * mtrr_state; 562static struct mtrr_value * mtrr_state;
563 563
564static int mtrr_save(struct sys_device * sysdev, u32 state) 564static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
565{ 565{
566 int i; 566 int i;
567 int size = num_var_ranges * sizeof(struct mtrr_value); 567 int size = num_var_ranges * sizeof(struct mtrr_value);
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index e5fab12f7926..913be77bb844 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -153,7 +153,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
153 disable_local_APIC(); 153 disable_local_APIC();
154 atomic_dec(&waiting_for_crash_ipi); 154 atomic_dec(&waiting_for_crash_ipi);
155 /* Assume hlt works */ 155 /* Assume hlt works */
156 __asm__("hlt"); 156 halt();
157 for(;;); 157 for(;;);
158 158
159 return 1; 159 return 1;
diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c
index a3cdf894302b..58516e2ac172 100644
--- a/arch/i386/kernel/dmi_scan.c
+++ b/arch/i386/kernel/dmi_scan.c
@@ -6,32 +6,28 @@
6#include <linux/bootmem.h> 6#include <linux/bootmem.h>
7 7
8 8
9struct dmi_header {
10 u8 type;
11 u8 length;
12 u16 handle;
13};
14
15#undef DMI_DEBUG
16
17#ifdef DMI_DEBUG
18#define dmi_printk(x) printk x
19#else
20#define dmi_printk(x)
21#endif
22
23static char * __init dmi_string(struct dmi_header *dm, u8 s) 9static char * __init dmi_string(struct dmi_header *dm, u8 s)
24{ 10{
25 u8 *bp = ((u8 *) dm) + dm->length; 11 u8 *bp = ((u8 *) dm) + dm->length;
12 char *str = "";
26 13
27 if (!s) 14 if (s) {
28 return "";
29 s--;
30 while (s > 0 && *bp) {
31 bp += strlen(bp) + 1;
32 s--; 15 s--;
33 } 16 while (s > 0 && *bp) {
34 return bp; 17 bp += strlen(bp) + 1;
18 s--;
19 }
20
21 if (*bp != 0) {
22 str = alloc_bootmem(strlen(bp) + 1);
23 if (str != NULL)
24 strcpy(str, bp);
25 else
26 printk(KERN_ERR "dmi_string: out of memory.\n");
27 }
28 }
29
30 return str;
35} 31}
36 32
37/* 33/*
@@ -84,69 +80,76 @@ static int __init dmi_checksum(u8 *buf)
84 return sum == 0; 80 return sum == 0;
85} 81}
86 82
87static int __init dmi_iterate(void (*decode)(struct dmi_header *)) 83static char *dmi_ident[DMI_STRING_MAX];
84static LIST_HEAD(dmi_devices);
85
86/*
87 * Save a DMI string
88 */
89static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
88{ 90{
89 u8 buf[15]; 91 char *p, *d = (char*) dm;
90 char __iomem *p, *q;
91 92
92 /* 93 if (dmi_ident[slot])
93 * no iounmap() for that ioremap(); it would be a no-op, but it's 94 return;
94 * so early in setup that sucker gets confused into doing what 95
95 * it shouldn't if we actually call it. 96 p = dmi_string(dm, d[string]);
96 */
97 p = ioremap(0xF0000, 0x10000);
98 if (p == NULL) 97 if (p == NULL)
99 return -1; 98 return;
100 99
101 for (q = p; q < p + 0x10000; q += 16) { 100 dmi_ident[slot] = p;
102 memcpy_fromio(buf, q, 15); 101}
103 if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
104 u16 num = (buf[13] << 8) | buf[12];
105 u16 len = (buf[7] << 8) | buf[6];
106 u32 base = (buf[11] << 24) | (buf[10] << 16) |
107 (buf[9] << 8) | buf[8];
108 102
109 /* 103static void __init dmi_save_devices(struct dmi_header *dm)
110 * DMI version 0.0 means that the real version is taken from 104{
111 * the SMBIOS version, which we don't know at this point. 105 int i, count = (dm->length - sizeof(struct dmi_header)) / 2;
112 */ 106 struct dmi_device *dev;
113 if (buf[14] != 0) 107
114 printk(KERN_INFO "DMI %d.%d present.\n", 108 for (i = 0; i < count; i++) {
115 buf[14] >> 4, buf[14] & 0xF); 109 char *d = ((char *) dm) + (i * 2);
116 else
117 printk(KERN_INFO "DMI present.\n");
118 110
119 dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n", 111 /* Skip disabled device */
120 num, len)); 112 if ((*d & 0x80) == 0)
121 dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base)); 113 continue;
122 114
123 if (dmi_table(base,len, num, decode) == 0) 115 dev = alloc_bootmem(sizeof(*dev));
124 return 0; 116 if (!dev) {
117 printk(KERN_ERR "dmi_save_devices: out of memory.\n");
118 break;
125 } 119 }
120
121 dev->type = *d++ & 0x7f;
122 dev->name = dmi_string(dm, *d);
123 dev->device_data = NULL;
124
125 list_add(&dev->list, &dmi_devices);
126 } 126 }
127 return -1;
128} 127}
129 128
130static char *dmi_ident[DMI_STRING_MAX]; 129static void __init dmi_save_ipmi_device(struct dmi_header *dm)
131
132/*
133 * Save a DMI string
134 */
135static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
136{ 130{
137 char *d = (char*)dm; 131 struct dmi_device *dev;
138 char *p = dmi_string(dm, d[string]); 132 void * data;
139 133
140 if (p == NULL || *p == 0) 134 data = alloc_bootmem(dm->length);
135 if (data == NULL) {
136 printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n");
141 return; 137 return;
142 if (dmi_ident[slot]) 138 }
139
140 memcpy(data, dm, dm->length);
141
142 dev = alloc_bootmem(sizeof(*dev));
143 if (!dev) {
144 printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n");
143 return; 145 return;
146 }
144 147
145 dmi_ident[slot] = alloc_bootmem(strlen(p) + 1); 148 dev->type = DMI_DEV_TYPE_IPMI;
146 if(dmi_ident[slot]) 149 dev->name = "IPMI controller";
147 strcpy(dmi_ident[slot], p); 150 dev->device_data = data;
148 else 151
149 printk(KERN_ERR "dmi_save_ident: out of memory.\n"); 152 list_add(&dev->list, &dmi_devices);
150} 153}
151 154
152/* 155/*
@@ -156,42 +159,69 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
156 */ 159 */
157static void __init dmi_decode(struct dmi_header *dm) 160static void __init dmi_decode(struct dmi_header *dm)
158{ 161{
159 u8 *data __attribute__((__unused__)) = (u8 *)dm;
160
161 switch(dm->type) { 162 switch(dm->type) {
162 case 0: 163 case 0: /* BIOS Information */
163 dmi_printk(("BIOS Vendor: %s\n", dmi_string(dm, data[4])));
164 dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); 164 dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
165 dmi_printk(("BIOS Version: %s\n", dmi_string(dm, data[5])));
166 dmi_save_ident(dm, DMI_BIOS_VERSION, 5); 165 dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
167 dmi_printk(("BIOS Release: %s\n", dmi_string(dm, data[8])));
168 dmi_save_ident(dm, DMI_BIOS_DATE, 8); 166 dmi_save_ident(dm, DMI_BIOS_DATE, 8);
169 break; 167 break;
170 case 1: 168 case 1: /* System Information */
171 dmi_printk(("System Vendor: %s\n", dmi_string(dm, data[4])));
172 dmi_save_ident(dm, DMI_SYS_VENDOR, 4); 169 dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
173 dmi_printk(("Product Name: %s\n", dmi_string(dm, data[5])));
174 dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); 170 dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
175 dmi_printk(("Version: %s\n", dmi_string(dm, data[6])));
176 dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); 171 dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
177 dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7])));
178 dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); 172 dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7);
179 break; 173 break;
180 case 2: 174 case 2: /* Base Board Information */
181 dmi_printk(("Board Vendor: %s\n", dmi_string(dm, data[4])));
182 dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); 175 dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
183 dmi_printk(("Board Name: %s\n", dmi_string(dm, data[5])));
184 dmi_save_ident(dm, DMI_BOARD_NAME, 5); 176 dmi_save_ident(dm, DMI_BOARD_NAME, 5);
185 dmi_printk(("Board Version: %s\n", dmi_string(dm, data[6])));
186 dmi_save_ident(dm, DMI_BOARD_VERSION, 6); 177 dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
187 break; 178 break;
179 case 10: /* Onboard Devices Information */
180 dmi_save_devices(dm);
181 break;
182 case 38: /* IPMI Device Information */
183 dmi_save_ipmi_device(dm);
188 } 184 }
189} 185}
190 186
191void __init dmi_scan_machine(void) 187void __init dmi_scan_machine(void)
192{ 188{
193 if (dmi_iterate(dmi_decode)) 189 u8 buf[15];
194 printk(KERN_INFO "DMI not present.\n"); 190 char __iomem *p, *q;
191
192 /*
193 * no iounmap() for that ioremap(); it would be a no-op, but it's
194 * so early in setup that sucker gets confused into doing what
195 * it shouldn't if we actually call it.
196 */
197 p = ioremap(0xF0000, 0x10000);
198 if (p == NULL)
199 goto out;
200
201 for (q = p; q < p + 0x10000; q += 16) {
202 memcpy_fromio(buf, q, 15);
203 if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
204 u16 num = (buf[13] << 8) | buf[12];
205 u16 len = (buf[7] << 8) | buf[6];
206 u32 base = (buf[11] << 24) | (buf[10] << 16) |
207 (buf[9] << 8) | buf[8];
208
209 /*
210 * DMI version 0.0 means that the real version is taken from
211 * the SMBIOS version, which we don't know at this point.
212 */
213 if (buf[14] != 0)
214 printk(KERN_INFO "DMI %d.%d present.\n",
215 buf[14] >> 4, buf[14] & 0xF);
216 else
217 printk(KERN_INFO "DMI present.\n");
218
219 if (dmi_table(base,len, num, dmi_decode) == 0)
220 return;
221 }
222 }
223
224out: printk(KERN_INFO "DMI not present.\n");
195} 225}
196 226
197 227
@@ -218,9 +248,9 @@ int dmi_check_system(struct dmi_system_id *list)
218 /* No match */ 248 /* No match */
219 goto fail; 249 goto fail;
220 } 250 }
251 count++;
221 if (d->callback && d->callback(d)) 252 if (d->callback && d->callback(d))
222 break; 253 break;
223 count++;
224fail: d++; 254fail: d++;
225 } 255 }
226 256
@@ -240,3 +270,32 @@ char *dmi_get_system_info(int field)
240 return dmi_ident[field]; 270 return dmi_ident[field];
241} 271}
242EXPORT_SYMBOL(dmi_get_system_info); 272EXPORT_SYMBOL(dmi_get_system_info);
273
274/**
275 * dmi_find_device - find onboard device by type/name
276 * @type: device type or %DMI_DEV_TYPE_ANY to match all device types
277 * @desc: device name string or %NULL to match all
278 * @from: previous device found in search, or %NULL for new search.
279 *
280 * Iterates through the list of known onboard devices. If a device is
281 * found with a matching @vendor and @device, a pointer to its device
282 * structure is returned. Otherwise, %NULL is returned.
283 * A new search is initiated by passing %NULL to the @from argument.
284 * If @from is not %NULL, searches continue from next device.
285 */
286struct dmi_device * dmi_find_device(int type, const char *name,
287 struct dmi_device *from)
288{
289 struct list_head *d, *head = from ? &from->list : &dmi_devices;
290
291 for(d = head->next; d != &dmi_devices; d = d->next) {
292 struct dmi_device *dev = list_entry(d, struct dmi_device, list);
293
294 if (((type == DMI_DEV_TYPE_ANY) || (dev->type == type)) &&
295 ((name == NULL) || (strcmp(dev->name, name) == 0)))
296 return dev;
297 }
298
299 return NULL;
300}
301EXPORT_SYMBOL(dmi_find_device);
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c
index 789af3e9fb1f..5edb1d379add 100644
--- a/arch/i386/kernel/doublefault.c
+++ b/arch/i386/kernel/doublefault.c
@@ -20,7 +20,7 @@ static void doublefault_fn(void)
20 struct Xgt_desc_struct gdt_desc = {0, 0}; 20 struct Xgt_desc_struct gdt_desc = {0, 0};
21 unsigned long gdt, tss; 21 unsigned long gdt, tss;
22 22
23 __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory"); 23 store_gdt(&gdt_desc);
24 gdt = gdt_desc.address; 24 gdt = gdt_desc.address;
25 25
26 printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); 26 printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index 385883ea8c19..ecad519fd395 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -79,7 +79,7 @@ static void efi_call_phys_prelog(void)
79 * directory. If I have PSE, I just need to duplicate one entry in 79 * directory. If I have PSE, I just need to duplicate one entry in
80 * page directory. 80 * page directory.
81 */ 81 */
82 __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); 82 cr4 = read_cr4();
83 83
84 if (cr4 & X86_CR4_PSE) { 84 if (cr4 & X86_CR4_PSE) {
85 efi_bak_pg_dir_pointer[0].pgd = 85 efi_bak_pg_dir_pointer[0].pgd =
@@ -104,8 +104,7 @@ static void efi_call_phys_prelog(void)
104 local_flush_tlb(); 104 local_flush_tlb();
105 105
106 cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); 106 cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address);
107 __asm__ __volatile__("lgdt %0":"=m" 107 load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0]));
108 (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])));
109} 108}
110 109
111static void efi_call_phys_epilog(void) 110static void efi_call_phys_epilog(void)
@@ -114,8 +113,8 @@ static void efi_call_phys_epilog(void)
114 113
115 cpu_gdt_descr[0].address = 114 cpu_gdt_descr[0].address =
116 (unsigned long) __va(cpu_gdt_descr[0].address); 115 (unsigned long) __va(cpu_gdt_descr[0].address);
117 __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr)); 116 load_gdt(&cpu_gdt_descr[0]);
118 __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); 117 cr4 = read_cr4();
119 118
120 if (cr4 & X86_CR4_PSE) { 119 if (cr4 & X86_CR4_PSE) {
121 swapper_pg_dir[pgd_index(0)].pgd = 120 swapper_pg_dir[pgd_index(0)].pgd =
@@ -233,22 +232,23 @@ void __init efi_map_memmap(void)
233{ 232{
234 memmap.map = NULL; 233 memmap.map = NULL;
235 234
236 memmap.map = (efi_memory_desc_t *) 235 memmap.map = bt_ioremap((unsigned long) memmap.phys_map,
237 bt_ioremap((unsigned long) memmap.phys_map, 236 (memmap.nr_map * memmap.desc_size));
238 (memmap.nr_map * sizeof(efi_memory_desc_t)));
239
240 if (memmap.map == NULL) 237 if (memmap.map == NULL)
241 printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); 238 printk(KERN_ERR PFX "Could not remap the EFI memmap!\n");
239
240 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
242} 241}
243 242
244#if EFI_DEBUG 243#if EFI_DEBUG
245static void __init print_efi_memmap(void) 244static void __init print_efi_memmap(void)
246{ 245{
247 efi_memory_desc_t *md; 246 efi_memory_desc_t *md;
247 void *p;
248 int i; 248 int i;
249 249
250 for (i = 0; i < memmap.nr_map; i++) { 250 for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
251 md = &memmap.map[i]; 251 md = p;
252 printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " 252 printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, "
253 "range=[0x%016llx-0x%016llx) (%lluMB)\n", 253 "range=[0x%016llx-0x%016llx) (%lluMB)\n",
254 i, md->type, md->attribute, md->phys_addr, 254 i, md->type, md->attribute, md->phys_addr,
@@ -271,10 +271,10 @@ void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
271 } prev, curr; 271 } prev, curr;
272 efi_memory_desc_t *md; 272 efi_memory_desc_t *md;
273 unsigned long start, end; 273 unsigned long start, end;
274 int i; 274 void *p;
275 275
276 for (i = 0; i < memmap.nr_map; i++) { 276 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
277 md = &memmap.map[i]; 277 md = p;
278 278
279 if ((md->num_pages == 0) || (!is_available_memory(md))) 279 if ((md->num_pages == 0) || (!is_available_memory(md)))
280 continue; 280 continue;
@@ -325,6 +325,7 @@ void __init efi_init(void)
325 memmap.phys_map = EFI_MEMMAP; 325 memmap.phys_map = EFI_MEMMAP;
326 memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; 326 memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE;
327 memmap.desc_version = EFI_MEMDESC_VERSION; 327 memmap.desc_version = EFI_MEMDESC_VERSION;
328 memmap.desc_size = EFI_MEMDESC_SIZE;
328 329
329 efi.systab = (efi_system_table_t *) 330 efi.systab = (efi_system_table_t *)
330 boot_ioremap((unsigned long) efi_phys.systab, 331 boot_ioremap((unsigned long) efi_phys.systab,
@@ -428,22 +429,30 @@ void __init efi_init(void)
428 printk(KERN_ERR PFX "Could not map the runtime service table!\n"); 429 printk(KERN_ERR PFX "Could not map the runtime service table!\n");
429 430
430 /* Map the EFI memory map for use until paging_init() */ 431 /* Map the EFI memory map for use until paging_init() */
431 432 memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE);
432 memmap.map = (efi_memory_desc_t *)
433 boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE);
434
435 if (memmap.map == NULL) 433 if (memmap.map == NULL)
436 printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); 434 printk(KERN_ERR PFX "Could not map the EFI memory map!\n");
437 435
438 if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) { 436 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
439 printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't " 437
440 "match the one from EFI!\n");
441 }
442#if EFI_DEBUG 438#if EFI_DEBUG
443 print_efi_memmap(); 439 print_efi_memmap();
444#endif 440#endif
445} 441}
446 442
443static inline void __init check_range_for_systab(efi_memory_desc_t *md)
444{
445 if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) &&
446 ((unsigned long)efi_phys.systab < md->phys_addr +
447 ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) {
448 unsigned long addr;
449
450 addr = md->virt_addr - md->phys_addr +
451 (unsigned long)efi_phys.systab;
452 efi.systab = (efi_system_table_t *)addr;
453 }
454}
455
447/* 456/*
448 * This function will switch the EFI runtime services to virtual mode. 457 * This function will switch the EFI runtime services to virtual mode.
449 * Essentially, look through the EFI memmap and map every region that 458 * Essentially, look through the EFI memmap and map every region that
@@ -457,43 +466,32 @@ void __init efi_enter_virtual_mode(void)
457{ 466{
458 efi_memory_desc_t *md; 467 efi_memory_desc_t *md;
459 efi_status_t status; 468 efi_status_t status;
460 int i; 469 void *p;
461 470
462 efi.systab = NULL; 471 efi.systab = NULL;
463 472
464 for (i = 0; i < memmap.nr_map; i++) { 473 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
465 md = &memmap.map[i]; 474 md = p;
466 475
467 if (md->attribute & EFI_MEMORY_RUNTIME) { 476 if (!(md->attribute & EFI_MEMORY_RUNTIME))
468 md->virt_addr = 477 continue;
469 (unsigned long)ioremap(md->phys_addr,
470 md->num_pages << EFI_PAGE_SHIFT);
471 if (!(unsigned long)md->virt_addr) {
472 printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
473 (unsigned long)md->phys_addr);
474 }
475 478
476 if (((unsigned long)md->phys_addr <= 479 md->virt_addr = (unsigned long)ioremap(md->phys_addr,
477 (unsigned long)efi_phys.systab) && 480 md->num_pages << EFI_PAGE_SHIFT);
478 ((unsigned long)efi_phys.systab < 481 if (!(unsigned long)md->virt_addr) {
479 md->phys_addr + 482 printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
480 ((unsigned long)md->num_pages << 483 (unsigned long)md->phys_addr);
481 EFI_PAGE_SHIFT))) {
482 unsigned long addr;
483
484 addr = md->virt_addr - md->phys_addr +
485 (unsigned long)efi_phys.systab;
486 efi.systab = (efi_system_table_t *)addr;
487 }
488 } 484 }
485 /* update the virtual address of the EFI system table */
486 check_range_for_systab(md);
489 } 487 }
490 488
491 if (!efi.systab) 489 if (!efi.systab)
492 BUG(); 490 BUG();
493 491
494 status = phys_efi_set_virtual_address_map( 492 status = phys_efi_set_virtual_address_map(
495 sizeof(efi_memory_desc_t) * memmap.nr_map, 493 memmap.desc_size * memmap.nr_map,
496 sizeof(efi_memory_desc_t), 494 memmap.desc_size,
497 memmap.desc_version, 495 memmap.desc_version,
498 memmap.phys_map); 496 memmap.phys_map);
499 497
@@ -533,10 +531,10 @@ efi_initialize_iomem_resources(struct resource *code_resource,
533{ 531{
534 struct resource *res; 532 struct resource *res;
535 efi_memory_desc_t *md; 533 efi_memory_desc_t *md;
536 int i; 534 void *p;
537 535
538 for (i = 0; i < memmap.nr_map; i++) { 536 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
539 md = &memmap.map[i]; 537 md = p;
540 538
541 if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > 539 if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >
542 0x100000000ULL) 540 0x100000000ULL)
@@ -613,10 +611,10 @@ efi_initialize_iomem_resources(struct resource *code_resource,
613u32 efi_mem_type(unsigned long phys_addr) 611u32 efi_mem_type(unsigned long phys_addr)
614{ 612{
615 efi_memory_desc_t *md; 613 efi_memory_desc_t *md;
616 int i; 614 void *p;
617 615
618 for (i = 0; i < memmap.nr_map; i++) { 616 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
619 md = &memmap.map[i]; 617 md = p;
620 if ((md->phys_addr <= phys_addr) && (phys_addr < 618 if ((md->phys_addr <= phys_addr) && (phys_addr <
621 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) 619 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
622 return md->type; 620 return md->type;
@@ -627,10 +625,10 @@ u32 efi_mem_type(unsigned long phys_addr)
627u64 efi_mem_attributes(unsigned long phys_addr) 625u64 efi_mem_attributes(unsigned long phys_addr)
628{ 626{
629 efi_memory_desc_t *md; 627 efi_memory_desc_t *md;
630 int i; 628 void *p;
631 629
632 for (i = 0; i < memmap.nr_map; i++) { 630 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
633 md = &memmap.map[i]; 631 md = p;
634 if ((md->phys_addr <= phys_addr) && (phys_addr < 632 if ((md->phys_addr <= phys_addr) && (phys_addr <
635 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) 633 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
636 return md->attribute; 634 return md->attribute;
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index a991d4e5edd2..3aad03839660 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -203,7 +203,7 @@ sysenter_past_esp:
203 GET_THREAD_INFO(%ebp) 203 GET_THREAD_INFO(%ebp)
204 204
205 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 205 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
206 testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) 206 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
207 jnz syscall_trace_entry 207 jnz syscall_trace_entry
208 cmpl $(nr_syscalls), %eax 208 cmpl $(nr_syscalls), %eax
209 jae syscall_badsys 209 jae syscall_badsys
@@ -226,9 +226,9 @@ ENTRY(system_call)
226 pushl %eax # save orig_eax 226 pushl %eax # save orig_eax
227 SAVE_ALL 227 SAVE_ALL
228 GET_THREAD_INFO(%ebp) 228 GET_THREAD_INFO(%ebp)
229 # system call tracing in operation 229 # system call tracing in operation / emulation
230 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 230 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
231 testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) 231 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
232 jnz syscall_trace_entry 232 jnz syscall_trace_entry
233 cmpl $(nr_syscalls), %eax 233 cmpl $(nr_syscalls), %eax
234 jae syscall_badsys 234 jae syscall_badsys
@@ -338,6 +338,9 @@ syscall_trace_entry:
338 movl %esp, %eax 338 movl %esp, %eax
339 xorl %edx,%edx 339 xorl %edx,%edx
340 call do_syscall_trace 340 call do_syscall_trace
341 cmpl $0, %eax
342 jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
343 # so must skip actual syscall
341 movl ORIG_EAX(%esp), %eax 344 movl ORIG_EAX(%esp), %eax
342 cmpl $(nr_syscalls), %eax 345 cmpl $(nr_syscalls), %eax
343 jnae syscall_call 346 jnae syscall_call
@@ -504,7 +507,7 @@ label: \
504 pushl $__KERNEL_CS; \ 507 pushl $__KERNEL_CS; \
505 pushl $sysenter_past_esp 508 pushl $sysenter_past_esp
506 509
507ENTRY(debug) 510KPROBE_ENTRY(debug)
508 cmpl $sysenter_entry,(%esp) 511 cmpl $sysenter_entry,(%esp)
509 jne debug_stack_correct 512 jne debug_stack_correct
510 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) 513 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
@@ -515,7 +518,7 @@ debug_stack_correct:
515 movl %esp,%eax # pt_regs pointer 518 movl %esp,%eax # pt_regs pointer
516 call do_debug 519 call do_debug
517 jmp ret_from_exception 520 jmp ret_from_exception
518 521 .previous .text
519/* 522/*
520 * NMI is doubly nasty. It can happen _while_ we're handling 523 * NMI is doubly nasty. It can happen _while_ we're handling
521 * a debug fault, and the debug fault hasn't yet been able to 524 * a debug fault, and the debug fault hasn't yet been able to
@@ -588,13 +591,14 @@ nmi_16bit_stack:
588 .long 1b,iret_exc 591 .long 1b,iret_exc
589.previous 592.previous
590 593
591ENTRY(int3) 594KPROBE_ENTRY(int3)
592 pushl $-1 # mark this as an int 595 pushl $-1 # mark this as an int
593 SAVE_ALL 596 SAVE_ALL
594 xorl %edx,%edx # zero error code 597 xorl %edx,%edx # zero error code
595 movl %esp,%eax # pt_regs pointer 598 movl %esp,%eax # pt_regs pointer
596 call do_int3 599 call do_int3
597 jmp ret_from_exception 600 jmp ret_from_exception
601 .previous .text
598 602
599ENTRY(overflow) 603ENTRY(overflow)
600 pushl $0 604 pushl $0
@@ -628,17 +632,19 @@ ENTRY(stack_segment)
628 pushl $do_stack_segment 632 pushl $do_stack_segment
629 jmp error_code 633 jmp error_code
630 634
631ENTRY(general_protection) 635KPROBE_ENTRY(general_protection)
632 pushl $do_general_protection 636 pushl $do_general_protection
633 jmp error_code 637 jmp error_code
638 .previous .text
634 639
635ENTRY(alignment_check) 640ENTRY(alignment_check)
636 pushl $do_alignment_check 641 pushl $do_alignment_check
637 jmp error_code 642 jmp error_code
638 643
639ENTRY(page_fault) 644KPROBE_ENTRY(page_fault)
640 pushl $do_page_fault 645 pushl $do_page_fault
641 jmp error_code 646 jmp error_code
647 .previous .text
642 648
643#ifdef CONFIG_X86_MCE 649#ifdef CONFIG_X86_MCE
644ENTRY(machine_check) 650ENTRY(machine_check)
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 4477bb107098..0480ca9e9e57 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -77,6 +77,32 @@ ENTRY(startup_32)
77 subl %edi,%ecx 77 subl %edi,%ecx
78 shrl $2,%ecx 78 shrl $2,%ecx
79 rep ; stosl 79 rep ; stosl
80/*
81 * Copy bootup parameters out of the way.
82 * Note: %esi still has the pointer to the real-mode data.
83 * With the kexec as boot loader, parameter segment might be loaded beyond
84 * kernel image and might not even be addressable by early boot page tables.
85 * (kexec on panic case). Hence copy out the parameters before initializing
86 * page tables.
87 */
88 movl $(boot_params - __PAGE_OFFSET),%edi
89 movl $(PARAM_SIZE/4),%ecx
90 cld
91 rep
92 movsl
93 movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
94 andl %esi,%esi
95 jnz 2f # New command line protocol
96 cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
97 jne 1f
98 movzwl OLD_CL_OFFSET,%esi
99 addl $(OLD_CL_BASE_ADDR),%esi
1002:
101 movl $(saved_command_line - __PAGE_OFFSET),%edi
102 movl $(COMMAND_LINE_SIZE/4),%ecx
103 rep
104 movsl
1051:
80 106
81/* 107/*
82 * Initialize page tables. This creates a PDE and a set of page 108 * Initialize page tables. This creates a PDE and a set of page
@@ -214,28 +240,6 @@ ENTRY(startup_32_smp)
214 */ 240 */
215 call setup_idt 241 call setup_idt
216 242
217/*
218 * Copy bootup parameters out of the way.
219 * Note: %esi still has the pointer to the real-mode data.
220 */
221 movl $boot_params,%edi
222 movl $(PARAM_SIZE/4),%ecx
223 cld
224 rep
225 movsl
226 movl boot_params+NEW_CL_POINTER,%esi
227 andl %esi,%esi
228 jnz 2f # New command line protocol
229 cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
230 jne 1f
231 movzwl OLD_CL_OFFSET,%esi
232 addl $(OLD_CL_BASE_ADDR),%esi
2332:
234 movl $saved_command_line,%edi
235 movl $(COMMAND_LINE_SIZE/4),%ecx
236 rep
237 movsl
2381:
239checkCPUtype: 243checkCPUtype:
240 244
241 movl $-1,X86_CPUID # -1 for no CPUID initially 245 movl $-1,X86_CPUID # -1 for no CPUID initially
diff --git a/arch/i386/kernel/i8237.c b/arch/i386/kernel/i8237.c
new file mode 100644
index 000000000000..c36d1c006c2f
--- /dev/null
+++ b/arch/i386/kernel/i8237.c
@@ -0,0 +1,67 @@
1/*
2 * i8237.c: 8237A DMA controller suspend functions.
3 *
4 * Written by Pierre Ossman, 2005.
5 */
6
7#include <linux/init.h>
8#include <linux/sysdev.h>
9
10#include <asm/dma.h>
11
12/*
13 * This module just handles suspend/resume issues with the
14 * 8237A DMA controller (used for ISA and LPC).
15 * Allocation is handled in kernel/dma.c and normal usage is
16 * in asm/dma.h.
17 */
18
19static int i8237A_resume(struct sys_device *dev)
20{
21 unsigned long flags;
22 int i;
23
24 flags = claim_dma_lock();
25
26 dma_outb(DMA1_RESET_REG, 0);
27 dma_outb(DMA2_RESET_REG, 0);
28
29 for (i = 0;i < 8;i++) {
30 set_dma_addr(i, 0x000000);
31 /* DMA count is a bit weird so this is not 0 */
32 set_dma_count(i, 1);
33 }
34
35 /* Enable cascade DMA or channel 0-3 won't work */
36 enable_dma(4);
37
38 release_dma_lock(flags);
39
40 return 0;
41}
42
43static int i8237A_suspend(struct sys_device *dev, pm_message_t state)
44{
45 return 0;
46}
47
48static struct sysdev_class i8237_sysdev_class = {
49 set_kset_name("i8237"),
50 .suspend = i8237A_suspend,
51 .resume = i8237A_resume,
52};
53
54static struct sys_device device_i8237A = {
55 .id = 0,
56 .cls = &i8237_sysdev_class,
57};
58
59static int __init i8237A_init_sysfs(void)
60{
61 int error = sysdev_class_register(&i8237_sysdev_class);
62 if (!error)
63 error = sysdev_register(&device_i8237A);
64 return error;
65}
66
67device_initcall(i8237A_init_sysfs);
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index ebedd2e21670..889eda2d7b17 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -33,6 +33,7 @@
33#include <linux/acpi.h> 33#include <linux/acpi.h>
34#include <linux/module.h> 34#include <linux/module.h>
35#include <linux/sysdev.h> 35#include <linux/sysdev.h>
36
36#include <asm/io.h> 37#include <asm/io.h>
37#include <asm/smp.h> 38#include <asm/smp.h>
38#include <asm/desc.h> 39#include <asm/desc.h>
@@ -77,7 +78,7 @@ static struct irq_pin_list {
77 int apic, pin, next; 78 int apic, pin, next;
78} irq_2_pin[PIN_MAP_SIZE]; 79} irq_2_pin[PIN_MAP_SIZE];
79 80
80int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; 81int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
81#ifdef CONFIG_PCI_MSI 82#ifdef CONFIG_PCI_MSI
82#define vector_to_irq(vector) \ 83#define vector_to_irq(vector) \
83 (platform_legacy_irq(vector) ? vector : vector_irq[vector]) 84 (platform_legacy_irq(vector) ? vector : vector_irq[vector])
@@ -222,13 +223,21 @@ static void clear_IO_APIC (void)
222 clear_IO_APIC_pin(apic, pin); 223 clear_IO_APIC_pin(apic, pin);
223} 224}
224 225
226#ifdef CONFIG_SMP
225static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) 227static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
226{ 228{
227 unsigned long flags; 229 unsigned long flags;
228 int pin; 230 int pin;
229 struct irq_pin_list *entry = irq_2_pin + irq; 231 struct irq_pin_list *entry = irq_2_pin + irq;
230 unsigned int apicid_value; 232 unsigned int apicid_value;
233 cpumask_t tmp;
231 234
235 cpus_and(tmp, cpumask, cpu_online_map);
236 if (cpus_empty(tmp))
237 tmp = TARGET_CPUS;
238
239 cpus_and(cpumask, tmp, CPU_MASK_ALL);
240
232 apicid_value = cpu_mask_to_apicid(cpumask); 241 apicid_value = cpu_mask_to_apicid(cpumask);
233 /* Prepare to do the io_apic_write */ 242 /* Prepare to do the io_apic_write */
234 apicid_value = apicid_value << 24; 243 apicid_value = apicid_value << 24;
@@ -242,6 +251,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
242 break; 251 break;
243 entry = irq_2_pin + entry->next; 252 entry = irq_2_pin + entry->next;
244 } 253 }
254 set_irq_info(irq, cpumask);
245 spin_unlock_irqrestore(&ioapic_lock, flags); 255 spin_unlock_irqrestore(&ioapic_lock, flags);
246} 256}
247 257
@@ -259,7 +269,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
259# define Dprintk(x...) 269# define Dprintk(x...)
260# endif 270# endif
261 271
262cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
263 272
264#define IRQBALANCE_CHECK_ARCH -999 273#define IRQBALANCE_CHECK_ARCH -999
265static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; 274static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
@@ -328,12 +337,7 @@ static inline void balance_irq(int cpu, int irq)
328 cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); 337 cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
329 new_cpu = move(cpu, allowed_mask, now, 1); 338 new_cpu = move(cpu, allowed_mask, now, 1);
330 if (cpu != new_cpu) { 339 if (cpu != new_cpu) {
331 irq_desc_t *desc = irq_desc + irq; 340 set_pending_irq(irq, cpumask_of_cpu(new_cpu));
332 unsigned long flags;
333
334 spin_lock_irqsave(&desc->lock, flags);
335 pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
336 spin_unlock_irqrestore(&desc->lock, flags);
337 } 341 }
338} 342}
339 343
@@ -528,16 +532,12 @@ tryanotherirq:
528 cpus_and(tmp, target_cpu_mask, allowed_mask); 532 cpus_and(tmp, target_cpu_mask, allowed_mask);
529 533
530 if (!cpus_empty(tmp)) { 534 if (!cpus_empty(tmp)) {
531 irq_desc_t *desc = irq_desc + selected_irq;
532 unsigned long flags;
533 535
534 Dprintk("irq = %d moved to cpu = %d\n", 536 Dprintk("irq = %d moved to cpu = %d\n",
535 selected_irq, min_loaded); 537 selected_irq, min_loaded);
536 /* mark for change destination */ 538 /* mark for change destination */
537 spin_lock_irqsave(&desc->lock, flags); 539 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
538 pending_irq_balance_cpumask[selected_irq] = 540
539 cpumask_of_cpu(min_loaded);
540 spin_unlock_irqrestore(&desc->lock, flags);
541 /* Since we made a change, come back sooner to 541 /* Since we made a change, come back sooner to
542 * check for more variation. 542 * check for more variation.
543 */ 543 */
@@ -568,7 +568,8 @@ static int balanced_irq(void *unused)
568 568
569 /* push everything to CPU 0 to give us a starting point. */ 569 /* push everything to CPU 0 to give us a starting point. */
570 for (i = 0 ; i < NR_IRQS ; i++) { 570 for (i = 0 ; i < NR_IRQS ; i++) {
571 pending_irq_balance_cpumask[i] = cpumask_of_cpu(0); 571 pending_irq_cpumask[i] = cpumask_of_cpu(0);
572 set_pending_irq(i, cpumask_of_cpu(0));
572 } 573 }
573 574
574 for ( ; ; ) { 575 for ( ; ; ) {
@@ -647,20 +648,9 @@ int __init irqbalance_disable(char *str)
647 648
648__setup("noirqbalance", irqbalance_disable); 649__setup("noirqbalance", irqbalance_disable);
649 650
650static inline void move_irq(int irq)
651{
652 /* note - we hold the desc->lock */
653 if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
654 set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
655 cpus_clear(pending_irq_balance_cpumask[irq]);
656 }
657}
658
659late_initcall(balanced_irq_init); 651late_initcall(balanced_irq_init);
660
661#else /* !CONFIG_IRQBALANCE */
662static inline void move_irq(int irq) { }
663#endif /* CONFIG_IRQBALANCE */ 652#endif /* CONFIG_IRQBALANCE */
653#endif /* CONFIG_SMP */
664 654
665#ifndef CONFIG_SMP 655#ifndef CONFIG_SMP
666void fastcall send_IPI_self(int vector) 656void fastcall send_IPI_self(int vector)
@@ -820,6 +810,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
820 * we need to reprogram the ioredtbls to cater for the cpus which have come online 810 * we need to reprogram the ioredtbls to cater for the cpus which have come online
821 * so mask in all cases should simply be TARGET_CPUS 811 * so mask in all cases should simply be TARGET_CPUS
822 */ 812 */
813#ifdef CONFIG_SMP
823void __init setup_ioapic_dest(void) 814void __init setup_ioapic_dest(void)
824{ 815{
825 int pin, ioapic, irq, irq_entry; 816 int pin, ioapic, irq, irq_entry;
@@ -838,6 +829,7 @@ void __init setup_ioapic_dest(void)
838 829
839 } 830 }
840} 831}
832#endif
841 833
842/* 834/*
843 * EISA Edge/Level control register, ELCR 835 * EISA Edge/Level control register, ELCR
@@ -1127,7 +1119,7 @@ static inline int IO_APIC_irq_trigger(int irq)
1127} 1119}
1128 1120
1129/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ 1121/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
1130u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; 1122u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
1131 1123
1132int assign_irq_vector(int irq) 1124int assign_irq_vector(int irq)
1133{ 1125{
@@ -1249,6 +1241,7 @@ static void __init setup_IO_APIC_irqs(void)
1249 spin_lock_irqsave(&ioapic_lock, flags); 1241 spin_lock_irqsave(&ioapic_lock, flags);
1250 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); 1242 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
1251 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); 1243 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
1244 set_native_irq_info(irq, TARGET_CPUS);
1252 spin_unlock_irqrestore(&ioapic_lock, flags); 1245 spin_unlock_irqrestore(&ioapic_lock, flags);
1253 } 1246 }
1254 } 1247 }
@@ -1944,6 +1937,7 @@ static void ack_edge_ioapic_vector(unsigned int vector)
1944{ 1937{
1945 int irq = vector_to_irq(vector); 1938 int irq = vector_to_irq(vector);
1946 1939
1940 move_irq(vector);
1947 ack_edge_ioapic_irq(irq); 1941 ack_edge_ioapic_irq(irq);
1948} 1942}
1949 1943
@@ -1958,6 +1952,7 @@ static void end_level_ioapic_vector (unsigned int vector)
1958{ 1952{
1959 int irq = vector_to_irq(vector); 1953 int irq = vector_to_irq(vector);
1960 1954
1955 move_irq(vector);
1961 end_level_ioapic_irq(irq); 1956 end_level_ioapic_irq(irq);
1962} 1957}
1963 1958
@@ -1975,14 +1970,17 @@ static void unmask_IO_APIC_vector (unsigned int vector)
1975 unmask_IO_APIC_irq(irq); 1970 unmask_IO_APIC_irq(irq);
1976} 1971}
1977 1972
1973#ifdef CONFIG_SMP
1978static void set_ioapic_affinity_vector (unsigned int vector, 1974static void set_ioapic_affinity_vector (unsigned int vector,
1979 cpumask_t cpu_mask) 1975 cpumask_t cpu_mask)
1980{ 1976{
1981 int irq = vector_to_irq(vector); 1977 int irq = vector_to_irq(vector);
1982 1978
1979 set_native_irq_info(vector, cpu_mask);
1983 set_ioapic_affinity_irq(irq, cpu_mask); 1980 set_ioapic_affinity_irq(irq, cpu_mask);
1984} 1981}
1985#endif 1982#endif
1983#endif
1986 1984
1987/* 1985/*
1988 * Level and edge triggered IO-APIC interrupts need different handling, 1986 * Level and edge triggered IO-APIC interrupts need different handling,
@@ -1992,7 +1990,7 @@ static void set_ioapic_affinity_vector (unsigned int vector,
1992 * edge-triggered handler, without risking IRQ storms and other ugly 1990 * edge-triggered handler, without risking IRQ storms and other ugly
1993 * races. 1991 * races.
1994 */ 1992 */
1995static struct hw_interrupt_type ioapic_edge_type = { 1993static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
1996 .typename = "IO-APIC-edge", 1994 .typename = "IO-APIC-edge",
1997 .startup = startup_edge_ioapic, 1995 .startup = startup_edge_ioapic,
1998 .shutdown = shutdown_edge_ioapic, 1996 .shutdown = shutdown_edge_ioapic,
@@ -2000,10 +1998,12 @@ static struct hw_interrupt_type ioapic_edge_type = {
2000 .disable = disable_edge_ioapic, 1998 .disable = disable_edge_ioapic,
2001 .ack = ack_edge_ioapic, 1999 .ack = ack_edge_ioapic,
2002 .end = end_edge_ioapic, 2000 .end = end_edge_ioapic,
2001#ifdef CONFIG_SMP
2003 .set_affinity = set_ioapic_affinity, 2002 .set_affinity = set_ioapic_affinity,
2003#endif
2004}; 2004};
2005 2005
2006static struct hw_interrupt_type ioapic_level_type = { 2006static struct hw_interrupt_type ioapic_level_type __read_mostly = {
2007 .typename = "IO-APIC-level", 2007 .typename = "IO-APIC-level",
2008 .startup = startup_level_ioapic, 2008 .startup = startup_level_ioapic,
2009 .shutdown = shutdown_level_ioapic, 2009 .shutdown = shutdown_level_ioapic,
@@ -2011,7 +2011,9 @@ static struct hw_interrupt_type ioapic_level_type = {
2011 .disable = disable_level_ioapic, 2011 .disable = disable_level_ioapic,
2012 .ack = mask_and_ack_level_ioapic, 2012 .ack = mask_and_ack_level_ioapic,
2013 .end = end_level_ioapic, 2013 .end = end_level_ioapic,
2014#ifdef CONFIG_SMP
2014 .set_affinity = set_ioapic_affinity, 2015 .set_affinity = set_ioapic_affinity,
2016#endif
2015}; 2017};
2016 2018
2017static inline void init_IO_APIC_traps(void) 2019static inline void init_IO_APIC_traps(void)
@@ -2074,7 +2076,7 @@ static void ack_lapic_irq (unsigned int irq)
2074 2076
2075static void end_lapic_irq (unsigned int i) { /* nothing */ } 2077static void end_lapic_irq (unsigned int i) { /* nothing */ }
2076 2078
2077static struct hw_interrupt_type lapic_irq_type = { 2079static struct hw_interrupt_type lapic_irq_type __read_mostly = {
2078 .typename = "local-APIC-edge", 2080 .typename = "local-APIC-edge",
2079 .startup = NULL, /* startup_irq() not used for IRQ0 */ 2081 .startup = NULL, /* startup_irq() not used for IRQ0 */
2080 .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ 2082 .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
@@ -2569,6 +2571,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
2569 spin_lock_irqsave(&ioapic_lock, flags); 2571 spin_lock_irqsave(&ioapic_lock, flags);
2570 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); 2572 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
2571 io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); 2573 io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
2574 set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
2572 spin_unlock_irqrestore(&ioapic_lock, flags); 2575 spin_unlock_irqrestore(&ioapic_lock, flags);
2573 2576
2574 return 0; 2577 return 0;
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index 8b25160393c1..f2b37654777f 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -132,6 +132,7 @@ asmlinkage long sys_iopl(unsigned long unused)
132 volatile struct pt_regs * regs = (struct pt_regs *) &unused; 132 volatile struct pt_regs * regs = (struct pt_regs *) &unused;
133 unsigned int level = regs->ebx; 133 unsigned int level = regs->ebx;
134 unsigned int old = (regs->eflags >> 12) & 3; 134 unsigned int old = (regs->eflags >> 12) & 3;
135 struct thread_struct *t = &current->thread;
135 136
136 if (level > 3) 137 if (level > 3)
137 return -EINVAL; 138 return -EINVAL;
@@ -140,8 +141,8 @@ asmlinkage long sys_iopl(unsigned long unused)
140 if (!capable(CAP_SYS_RAWIO)) 141 if (!capable(CAP_SYS_RAWIO))
141 return -EPERM; 142 return -EPERM;
142 } 143 }
143 regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12); 144 t->iopl = level << 12;
144 /* Make sure we return the long way (not sysenter) */ 145 regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
145 set_thread_flag(TIF_IRET); 146 set_iopl_mask(t->iopl);
146 return 0; 147 return 0;
147} 148}
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index a6d8c45961d3..6345b430b105 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -62,32 +62,32 @@ static inline int is_IF_modifier(kprobe_opcode_t opcode)
62 return 0; 62 return 0;
63} 63}
64 64
65int arch_prepare_kprobe(struct kprobe *p) 65int __kprobes arch_prepare_kprobe(struct kprobe *p)
66{ 66{
67 return 0; 67 return 0;
68} 68}
69 69
70void arch_copy_kprobe(struct kprobe *p) 70void __kprobes arch_copy_kprobe(struct kprobe *p)
71{ 71{
72 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 72 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
73 p->opcode = *p->addr; 73 p->opcode = *p->addr;
74} 74}
75 75
76void arch_arm_kprobe(struct kprobe *p) 76void __kprobes arch_arm_kprobe(struct kprobe *p)
77{ 77{
78 *p->addr = BREAKPOINT_INSTRUCTION; 78 *p->addr = BREAKPOINT_INSTRUCTION;
79 flush_icache_range((unsigned long) p->addr, 79 flush_icache_range((unsigned long) p->addr,
80 (unsigned long) p->addr + sizeof(kprobe_opcode_t)); 80 (unsigned long) p->addr + sizeof(kprobe_opcode_t));
81} 81}
82 82
83void arch_disarm_kprobe(struct kprobe *p) 83void __kprobes arch_disarm_kprobe(struct kprobe *p)
84{ 84{
85 *p->addr = p->opcode; 85 *p->addr = p->opcode;
86 flush_icache_range((unsigned long) p->addr, 86 flush_icache_range((unsigned long) p->addr,
87 (unsigned long) p->addr + sizeof(kprobe_opcode_t)); 87 (unsigned long) p->addr + sizeof(kprobe_opcode_t));
88} 88}
89 89
90void arch_remove_kprobe(struct kprobe *p) 90void __kprobes arch_remove_kprobe(struct kprobe *p)
91{ 91{
92} 92}
93 93
@@ -127,7 +127,8 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
127 regs->eip = (unsigned long)&p->ainsn.insn; 127 regs->eip = (unsigned long)&p->ainsn.insn;
128} 128}
129 129
130void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) 130void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
131 struct pt_regs *regs)
131{ 132{
132 unsigned long *sara = (unsigned long *)&regs->esp; 133 unsigned long *sara = (unsigned long *)&regs->esp;
133 struct kretprobe_instance *ri; 134 struct kretprobe_instance *ri;
@@ -150,7 +151,7 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
150 * Interrupts are disabled on entry as trap3 is an interrupt gate and they 151 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
151 * remain disabled thorough out this function. 152 * remain disabled thorough out this function.
152 */ 153 */
153static int kprobe_handler(struct pt_regs *regs) 154static int __kprobes kprobe_handler(struct pt_regs *regs)
154{ 155{
155 struct kprobe *p; 156 struct kprobe *p;
156 int ret = 0; 157 int ret = 0;
@@ -176,7 +177,8 @@ static int kprobe_handler(struct pt_regs *regs)
176 Disarm the probe we just hit, and ignore it. */ 177 Disarm the probe we just hit, and ignore it. */
177 p = get_kprobe(addr); 178 p = get_kprobe(addr);
178 if (p) { 179 if (p) {
179 if (kprobe_status == KPROBE_HIT_SS) { 180 if (kprobe_status == KPROBE_HIT_SS &&
181 *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
180 regs->eflags &= ~TF_MASK; 182 regs->eflags &= ~TF_MASK;
181 regs->eflags |= kprobe_saved_eflags; 183 regs->eflags |= kprobe_saved_eflags;
182 unlock_kprobes(); 184 unlock_kprobes();
@@ -220,7 +222,10 @@ static int kprobe_handler(struct pt_regs *regs)
220 * either a probepoint or a debugger breakpoint 222 * either a probepoint or a debugger breakpoint
221 * at this address. In either case, no further 223 * at this address. In either case, no further
222 * handling of this interrupt is appropriate. 224 * handling of this interrupt is appropriate.
225 * Back up over the (now missing) int3 and run
226 * the original instruction.
223 */ 227 */
228 regs->eip -= sizeof(kprobe_opcode_t);
224 ret = 1; 229 ret = 1;
225 } 230 }
226 /* Not one of ours: let kernel handle it */ 231 /* Not one of ours: let kernel handle it */
@@ -259,7 +264,7 @@ no_kprobe:
259/* 264/*
260 * Called when we hit the probe point at kretprobe_trampoline 265 * Called when we hit the probe point at kretprobe_trampoline
261 */ 266 */
262int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) 267int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
263{ 268{
264 struct kretprobe_instance *ri = NULL; 269 struct kretprobe_instance *ri = NULL;
265 struct hlist_head *head; 270 struct hlist_head *head;
@@ -338,7 +343,7 @@ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
338 * that is atop the stack is the address following the copied instruction. 343 * that is atop the stack is the address following the copied instruction.
339 * We need to make it the address following the original instruction. 344 * We need to make it the address following the original instruction.
340 */ 345 */
341static void resume_execution(struct kprobe *p, struct pt_regs *regs) 346static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
342{ 347{
343 unsigned long *tos = (unsigned long *)&regs->esp; 348 unsigned long *tos = (unsigned long *)&regs->esp;
344 unsigned long next_eip = 0; 349 unsigned long next_eip = 0;
@@ -444,8 +449,8 @@ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
444/* 449/*
445 * Wrapper routine to for handling exceptions. 450 * Wrapper routine to for handling exceptions.
446 */ 451 */
447int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, 452int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
448 void *data) 453 unsigned long val, void *data)
449{ 454{
450 struct die_args *args = (struct die_args *)data; 455 struct die_args *args = (struct die_args *)data;
451 switch (val) { 456 switch (val) {
@@ -473,7 +478,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
473 return NOTIFY_DONE; 478 return NOTIFY_DONE;
474} 479}
475 480
476int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) 481int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
477{ 482{
478 struct jprobe *jp = container_of(p, struct jprobe, kp); 483 struct jprobe *jp = container_of(p, struct jprobe, kp);
479 unsigned long addr; 484 unsigned long addr;
@@ -495,7 +500,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
495 return 1; 500 return 1;
496} 501}
497 502
498void jprobe_return(void) 503void __kprobes jprobe_return(void)
499{ 504{
500 preempt_enable_no_resched(); 505 preempt_enable_no_resched();
501 asm volatile (" xchgl %%ebx,%%esp \n" 506 asm volatile (" xchgl %%ebx,%%esp \n"
@@ -506,7 +511,7 @@ void jprobe_return(void)
506 (jprobe_saved_esp):"memory"); 511 (jprobe_saved_esp):"memory");
507} 512}
508 513
509int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) 514int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
510{ 515{
511 u8 *addr = (u8 *) (regs->eip - 1); 516 u8 *addr = (u8 *) (regs->eip - 1);
512 unsigned long stack_addr = (unsigned long)jprobe_saved_esp; 517 unsigned long stack_addr = (unsigned long)jprobe_saved_esp;
diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c
index bb50afbee921..fe1ffa55587d 100644
--- a/arch/i386/kernel/ldt.c
+++ b/arch/i386/kernel/ldt.c
@@ -177,7 +177,7 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount)
177static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) 177static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
178{ 178{
179 struct mm_struct * mm = current->mm; 179 struct mm_struct * mm = current->mm;
180 __u32 entry_1, entry_2, *lp; 180 __u32 entry_1, entry_2;
181 int error; 181 int error;
182 struct user_desc ldt_info; 182 struct user_desc ldt_info;
183 183
@@ -205,8 +205,6 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
205 goto out_unlock; 205 goto out_unlock;
206 } 206 }
207 207
208 lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
209
210 /* Allow LDTs to be cleared by the user. */ 208 /* Allow LDTs to be cleared by the user. */
211 if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { 209 if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
212 if (oldmode || LDT_empty(&ldt_info)) { 210 if (oldmode || LDT_empty(&ldt_info)) {
@@ -223,8 +221,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
223 221
224 /* Install the new entry ... */ 222 /* Install the new entry ... */
225install: 223install:
226 *lp = entry_1; 224 write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2);
227 *(lp+1) = entry_2;
228 error = 0; 225 error = 0;
229 226
230out_unlock: 227out_unlock:
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
index cb699a2aa1f8..a912fed48482 100644
--- a/arch/i386/kernel/machine_kexec.c
+++ b/arch/i386/kernel/machine_kexec.c
@@ -17,13 +17,7 @@
17#include <asm/apic.h> 17#include <asm/apic.h>
18#include <asm/cpufeature.h> 18#include <asm/cpufeature.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
20 20#include <asm/system.h>
21static inline unsigned long read_cr3(void)
22{
23 unsigned long cr3;
24 asm volatile("movl %%cr3,%0": "=r"(cr3));
25 return cr3;
26}
27 21
28#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) 22#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
29 23
@@ -99,10 +93,7 @@ static void set_idt(void *newidt, __u16 limit)
99 curidt.size = limit; 93 curidt.size = limit;
100 curidt.address = (unsigned long)newidt; 94 curidt.address = (unsigned long)newidt;
101 95
102 __asm__ __volatile__ ( 96 load_idt(&curidt);
103 "lidtl %0\n"
104 : : "m" (curidt)
105 );
106}; 97};
107 98
108 99
@@ -114,10 +105,7 @@ static void set_gdt(void *newgdt, __u16 limit)
114 curgdt.size = limit; 105 curgdt.size = limit;
115 curgdt.address = (unsigned long)newgdt; 106 curgdt.address = (unsigned long)newgdt;
116 107
117 __asm__ __volatile__ ( 108 load_gdt(&curgdt);
118 "lgdtl %0\n"
119 : : "m" (curgdt)
120 );
121}; 109};
122 110
123static void load_segments(void) 111static void load_segments(void)
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index a77c612aad00..165f13158c60 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -164,7 +164,8 @@ static void collect_cpu_info (void *unused)
164 } 164 }
165 165
166 wrmsr(MSR_IA32_UCODE_REV, 0, 0); 166 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
167 __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); 167 /* see notes above for revision 1.07. Apparent chip bug */
168 serialize_cpu();
168 /* get the current revision from MSR 0x8B */ 169 /* get the current revision from MSR 0x8B */
169 rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); 170 rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev);
170 pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", 171 pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
@@ -377,7 +378,9 @@ static void do_update_one (void * unused)
377 (unsigned long) uci->mc->bits >> 16 >> 16); 378 (unsigned long) uci->mc->bits >> 16 >> 16);
378 wrmsr(MSR_IA32_UCODE_REV, 0, 0); 379 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
379 380
380 __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); 381 /* see notes above for revision 1.07. Apparent chip bug */
382 serialize_cpu();
383
381 /* get the current revision from MSR 0x8B */ 384 /* get the current revision from MSR 0x8B */
382 rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); 385 rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
383 386
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index 97dbf289dbd5..cafaeffe3818 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -65,6 +65,8 @@ int nr_ioapics;
65int pic_mode; 65int pic_mode;
66unsigned long mp_lapic_addr; 66unsigned long mp_lapic_addr;
67 67
68unsigned int def_to_bigsmp = 0;
69
68/* Processor that is doing the boot up */ 70/* Processor that is doing the boot up */
69unsigned int boot_cpu_physical_apicid = -1U; 71unsigned int boot_cpu_physical_apicid = -1U;
70/* Internal processor count */ 72/* Internal processor count */
@@ -120,7 +122,7 @@ static int MP_valid_apicid(int apicid, int version)
120 122
121static void __init MP_processor_info (struct mpc_config_processor *m) 123static void __init MP_processor_info (struct mpc_config_processor *m)
122{ 124{
123 int ver, apicid; 125 int ver, apicid, cpu, found_bsp = 0;
124 physid_mask_t tmp; 126 physid_mask_t tmp;
125 127
126 if (!(m->mpc_cpuflag & CPU_ENABLED)) 128 if (!(m->mpc_cpuflag & CPU_ENABLED))
@@ -179,6 +181,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
179 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { 181 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
180 Dprintk(" Bootup CPU\n"); 182 Dprintk(" Bootup CPU\n");
181 boot_cpu_physical_apicid = m->mpc_apicid; 183 boot_cpu_physical_apicid = m->mpc_apicid;
184 found_bsp = 1;
182 } 185 }
183 186
184 if (num_processors >= NR_CPUS) { 187 if (num_processors >= NR_CPUS) {
@@ -202,6 +205,11 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
202 return; 205 return;
203 } 206 }
204 207
208 if (found_bsp)
209 cpu = 0;
210 else
211 cpu = num_processors - 1;
212 cpu_set(cpu, cpu_possible_map);
205 tmp = apicid_to_cpu_present(apicid); 213 tmp = apicid_to_cpu_present(apicid);
206 physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp); 214 physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp);
207 215
@@ -213,6 +221,13 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
213 ver = 0x10; 221 ver = 0x10;
214 } 222 }
215 apic_version[m->mpc_apicid] = ver; 223 apic_version[m->mpc_apicid] = ver;
224 if ((num_processors > 8) &&
225 APIC_XAPIC(ver) &&
226 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
227 def_to_bigsmp = 1;
228 else
229 def_to_bigsmp = 0;
230
216 bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; 231 bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
217} 232}
218 233
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
index b2f03c39a6fe..03100d6fc5d6 100644
--- a/arch/i386/kernel/msr.c
+++ b/arch/i386/kernel/msr.c
@@ -46,23 +46,13 @@
46 46
47static struct class *msr_class; 47static struct class *msr_class;
48 48
49/* Note: "err" is handled in a funny way below. Otherwise one version
50 of gcc or another breaks. */
51
52static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx) 49static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx)
53{ 50{
54 int err; 51 int err;
55 52
56 asm volatile ("1: wrmsr\n" 53 err = wrmsr_safe(reg, eax, edx);
57 "2:\n" 54 if (err)
58 ".section .fixup,\"ax\"\n" 55 err = -EIO;
59 "3: movl %4,%0\n"
60 " jmp 2b\n"
61 ".previous\n"
62 ".section __ex_table,\"a\"\n"
63 " .align 4\n" " .long 1b,3b\n" ".previous":"=&bDS" (err)
64 :"a"(eax), "d"(edx), "c"(reg), "i"(-EIO), "0"(0));
65
66 return err; 56 return err;
67} 57}
68 58
@@ -70,18 +60,9 @@ static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx)
70{ 60{
71 int err; 61 int err;
72 62
73 asm volatile ("1: rdmsr\n" 63 err = rdmsr_safe(reg, eax, edx);
74 "2:\n" 64 if (err)
75 ".section .fixup,\"ax\"\n" 65 err = -EIO;
76 "3: movl %4,%0\n"
77 " jmp 2b\n"
78 ".previous\n"
79 ".section __ex_table,\"a\"\n"
80 " .align 4\n"
81 " .long 1b,3b\n"
82 ".previous":"=&bDS" (err), "=a"(*eax), "=d"(*edx)
83 :"c"(reg), "i"(-EIO), "0"(0));
84
85 return err; 66 return err;
86} 67}
87 68
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 8c242bb1ef45..0178457db721 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -478,6 +478,11 @@ void touch_nmi_watchdog (void)
478 */ 478 */
479 for (i = 0; i < NR_CPUS; i++) 479 for (i = 0; i < NR_CPUS; i++)
480 alert_counter[i] = 0; 480 alert_counter[i] = 0;
481
482 /*
483 * Tickle the softlockup detector too:
484 */
485 touch_softlockup_watchdog();
481} 486}
482 487
483extern void die_nmi(struct pt_regs *, const char *msg); 488extern void die_nmi(struct pt_regs *, const char *msg);
@@ -501,8 +506,11 @@ void nmi_watchdog_tick (struct pt_regs * regs)
501 */ 506 */
502 alert_counter[cpu]++; 507 alert_counter[cpu]++;
503 if (alert_counter[cpu] == 5*nmi_hz) 508 if (alert_counter[cpu] == 5*nmi_hz)
509 /*
510 * die_nmi will return ONLY if NOTIFY_STOP happens..
511 */
504 die_nmi(regs, "NMI Watchdog detected LOCKUP"); 512 die_nmi(regs, "NMI Watchdog detected LOCKUP");
505 } else { 513
506 last_irq_sums[cpu] = sum; 514 last_irq_sums[cpu] = sum;
507 alert_counter[cpu] = 0; 515 alert_counter[cpu] = 0;
508 } 516 }
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index e3f362e8af5b..b45cbf93d439 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -164,7 +164,7 @@ static inline void play_dead(void)
164 */ 164 */
165 local_irq_disable(); 165 local_irq_disable();
166 while (1) 166 while (1)
167 __asm__ __volatile__("hlt":::"memory"); 167 halt();
168} 168}
169#else 169#else
170static inline void play_dead(void) 170static inline void play_dead(void)
@@ -313,16 +313,12 @@ void show_regs(struct pt_regs * regs)
313 printk(" DS: %04x ES: %04x\n", 313 printk(" DS: %04x ES: %04x\n",
314 0xffff & regs->xds,0xffff & regs->xes); 314 0xffff & regs->xds,0xffff & regs->xes);
315 315
316 __asm__("movl %%cr0, %0": "=r" (cr0)); 316 cr0 = read_cr0();
317 __asm__("movl %%cr2, %0": "=r" (cr2)); 317 cr2 = read_cr2();
318 __asm__("movl %%cr3, %0": "=r" (cr3)); 318 cr3 = read_cr3();
319 /* This could fault if %cr4 does not exist */ 319 if (current_cpu_data.x86 > 4) {
320 __asm__("1: movl %%cr4, %0 \n" 320 cr4 = read_cr4();
321 "2: \n" 321 }
322 ".section __ex_table,\"a\" \n"
323 ".long 1b,2b \n"
324 ".previous \n"
325 : "=r" (cr4): "0" (0));
326 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); 322 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
327 show_trace(NULL, &regs->esp); 323 show_trace(NULL, &regs->esp);
328} 324}
@@ -682,21 +678,26 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
682 __unlazy_fpu(prev_p); 678 __unlazy_fpu(prev_p);
683 679
684 /* 680 /*
685 * Reload esp0, LDT and the page table pointer: 681 * Reload esp0.
686 */ 682 */
687 load_esp0(tss, next); 683 load_esp0(tss, next);
688 684
689 /* 685 /*
690 * Load the per-thread Thread-Local Storage descriptor. 686 * Save away %fs and %gs. No need to save %es and %ds, as
687 * those are always kernel segments while inside the kernel.
688 * Doing this before setting the new TLS descriptors avoids
689 * the situation where we temporarily have non-reloadable
690 * segments in %fs and %gs. This could be an issue if the
691 * NMI handler ever used %fs or %gs (it does not today), or
692 * if the kernel is running inside of a hypervisor layer.
691 */ 693 */
692 load_TLS(next, cpu); 694 savesegment(fs, prev->fs);
695 savesegment(gs, prev->gs);
693 696
694 /* 697 /*
695 * Save away %fs and %gs. No need to save %es and %ds, as 698 * Load the per-thread Thread-Local Storage descriptor.
696 * those are always kernel segments while inside the kernel.
697 */ 699 */
698 asm volatile("mov %%fs,%0":"=m" (prev->fs)); 700 load_TLS(next, cpu);
699 asm volatile("mov %%gs,%0":"=m" (prev->gs));
700 701
701 /* 702 /*
702 * Restore %fs and %gs if needed. 703 * Restore %fs and %gs if needed.
@@ -711,6 +712,12 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
711 loadsegment(gs, next->gs); 712 loadsegment(gs, next->gs);
712 713
713 /* 714 /*
715 * Restore IOPL if needed.
716 */
717 if (unlikely(prev->iopl != next->iopl))
718 set_iopl_mask(next->iopl);
719
720 /*
714 * Now maybe reload the debug registers 721 * Now maybe reload the debug registers
715 */ 722 */
716 if (unlikely(next->debugreg[7])) { 723 if (unlikely(next->debugreg[7])) {
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index 0da59b42843c..340980203b09 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -271,6 +271,8 @@ static void clear_singlestep(struct task_struct *child)
271void ptrace_disable(struct task_struct *child) 271void ptrace_disable(struct task_struct *child)
272{ 272{
273 clear_singlestep(child); 273 clear_singlestep(child);
274 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
275 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
274} 276}
275 277
276/* 278/*
@@ -509,15 +511,20 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
509 } 511 }
510 break; 512 break;
511 513
514 case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */
512 case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ 515 case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
513 case PTRACE_CONT: /* restart after signal. */ 516 case PTRACE_CONT: /* restart after signal. */
514 ret = -EIO; 517 ret = -EIO;
515 if (!valid_signal(data)) 518 if (!valid_signal(data))
516 break; 519 break;
517 if (request == PTRACE_SYSCALL) { 520 if (request == PTRACE_SYSEMU) {
521 set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
522 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
523 } else if (request == PTRACE_SYSCALL) {
518 set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); 524 set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
519 } 525 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
520 else { 526 } else {
527 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
521 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); 528 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
522 } 529 }
523 child->exit_code = data; 530 child->exit_code = data;
@@ -542,10 +549,17 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
542 wake_up_process(child); 549 wake_up_process(child);
543 break; 550 break;
544 551
552 case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */
545 case PTRACE_SINGLESTEP: /* set the trap flag. */ 553 case PTRACE_SINGLESTEP: /* set the trap flag. */
546 ret = -EIO; 554 ret = -EIO;
547 if (!valid_signal(data)) 555 if (!valid_signal(data))
548 break; 556 break;
557
558 if (request == PTRACE_SYSEMU_SINGLESTEP)
559 set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
560 else
561 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
562
549 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); 563 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
550 set_singlestep(child); 564 set_singlestep(child);
551 child->exit_code = data; 565 child->exit_code = data;
@@ -678,26 +692,52 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
678 * - triggered by current->work.syscall_trace 692 * - triggered by current->work.syscall_trace
679 */ 693 */
680__attribute__((regparm(3))) 694__attribute__((regparm(3)))
681void do_syscall_trace(struct pt_regs *regs, int entryexit) 695int do_syscall_trace(struct pt_regs *regs, int entryexit)
682{ 696{
697 int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU), ret = 0;
698 /* With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
699 * interception. */
700 int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
701
683 /* do the secure computing check first */ 702 /* do the secure computing check first */
684 secure_computing(regs->orig_eax); 703 secure_computing(regs->orig_eax);
685 704
686 if (unlikely(current->audit_context) && entryexit) 705 if (unlikely(current->audit_context)) {
687 audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax); 706 if (entryexit)
707 audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax);
708 /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
709 * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
710 * not used, entry.S will call us only on syscall exit, not
711 * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
712 * calling send_sigtrap() on syscall entry.
713 *
714 * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
715 * is_singlestep is false, despite his name, so we will still do
716 * the correct thing.
717 */
718 else if (is_singlestep)
719 goto out;
720 }
688 721
689 if (!(current->ptrace & PT_PTRACED)) 722 if (!(current->ptrace & PT_PTRACED))
690 goto out; 723 goto out;
691 724
725 /* If a process stops on the 1st tracepoint with SYSCALL_TRACE
726 * and then is resumed with SYSEMU_SINGLESTEP, it will come in
727 * here. We have to check this and return */
728 if (is_sysemu && entryexit)
729 return 0;
730
692 /* Fake a debug trap */ 731 /* Fake a debug trap */
693 if (test_thread_flag(TIF_SINGLESTEP)) 732 if (is_singlestep)
694 send_sigtrap(current, regs, 0); 733 send_sigtrap(current, regs, 0);
695 734
696 if (!test_thread_flag(TIF_SYSCALL_TRACE)) 735 if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
697 goto out; 736 goto out;
698 737
699 /* the 0x80 provides a way for the tracing parent to distinguish 738 /* the 0x80 provides a way for the tracing parent to distinguish
700 between a syscall stop and SIGTRAP delivery */ 739 between a syscall stop and SIGTRAP delivery */
740 /* Note that the debugger could change the result of test_thread_flag!*/
701 ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); 741 ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
702 742
703 /* 743 /*
@@ -709,9 +749,16 @@ void do_syscall_trace(struct pt_regs *regs, int entryexit)
709 send_sig(current->exit_code, current, 1); 749 send_sig(current->exit_code, current, 1);
710 current->exit_code = 0; 750 current->exit_code = 0;
711 } 751 }
752 ret = is_sysemu;
712 out: 753 out:
713 if (unlikely(current->audit_context) && !entryexit) 754 if (unlikely(current->audit_context) && !entryexit)
714 audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax, 755 audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax,
715 regs->ebx, regs->ecx, regs->edx, regs->esi); 756 regs->ebx, regs->ecx, regs->edx, regs->esi);
757 if (ret == 0)
758 return 0;
716 759
760 regs->orig_eax = -1; /* force skip of syscall restarting */
761 if (unlikely(current->audit_context))
762 audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax);
763 return 1;
717} 764}
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index c71fef31dc47..1cbb9c0f4704 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -13,6 +13,7 @@
13#include <linux/dmi.h> 13#include <linux/dmi.h>
14#include <asm/uaccess.h> 14#include <asm/uaccess.h>
15#include <asm/apic.h> 15#include <asm/apic.h>
16#include <asm/desc.h>
16#include "mach_reboot.h" 17#include "mach_reboot.h"
17#include <linux/reboot_fixups.h> 18#include <linux/reboot_fixups.h>
18 19
@@ -242,13 +243,13 @@ void machine_real_restart(unsigned char *code, int length)
242 243
243 /* Set up the IDT for real mode. */ 244 /* Set up the IDT for real mode. */
244 245
245 __asm__ __volatile__ ("lidt %0" : : "m" (real_mode_idt)); 246 load_idt(&real_mode_idt);
246 247
247 /* Set up a GDT from which we can load segment descriptors for real 248 /* Set up a GDT from which we can load segment descriptors for real
248 mode. The GDT is not used in real mode; it is just needed here to 249 mode. The GDT is not used in real mode; it is just needed here to
249 prepare the descriptors. */ 250 prepare the descriptors. */
250 251
251 __asm__ __volatile__ ("lgdt %0" : : "m" (real_mode_gdt)); 252 load_gdt(&real_mode_gdt);
252 253
253 /* Load the data segment registers, and thus the descriptors ready for 254 /* Load the data segment registers, and thus the descriptors ready for
254 real mode. The base address of each segment is 0x100, 16 times the 255 real mode. The base address of each segment is 0x100, 16 times the
@@ -316,7 +317,7 @@ void machine_emergency_restart(void)
316 if (!reboot_thru_bios) { 317 if (!reboot_thru_bios) {
317 if (efi_enabled) { 318 if (efi_enabled) {
318 efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); 319 efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL);
319 __asm__ __volatile__("lidt %0": :"m" (no_idt)); 320 load_idt(&no_idt);
320 __asm__ __volatile__("int3"); 321 __asm__ __volatile__("int3");
321 } 322 }
322 /* rebooting needs to touch the page at absolute addr 0 */ 323 /* rebooting needs to touch the page at absolute addr 0 */
@@ -325,7 +326,7 @@ void machine_emergency_restart(void)
325 mach_reboot_fixups(); /* for board specific fixups */ 326 mach_reboot_fixups(); /* for board specific fixups */
326 mach_reboot(); 327 mach_reboot();
327 /* That didn't work - force a triple fault.. */ 328 /* That didn't work - force a triple fault.. */
328 __asm__ __volatile__("lidt %0": :"m" (no_idt)); 329 load_idt(&no_idt);
329 __asm__ __volatile__("int3"); 330 __asm__ __volatile__("int3");
330 } 331 }
331 } 332 }
diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c
index 469f496e55c0..7455ab643943 100644
--- a/arch/i386/kernel/semaphore.c
+++ b/arch/i386/kernel/semaphore.c
@@ -13,171 +13,9 @@
13 * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> 13 * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
14 */ 14 */
15#include <linux/config.h> 15#include <linux/config.h>
16#include <linux/sched.h>
17#include <linux/err.h>
18#include <linux/init.h>
19#include <asm/semaphore.h> 16#include <asm/semaphore.h>
20 17
21/* 18/*
22 * Semaphores are implemented using a two-way counter:
23 * The "count" variable is decremented for each process
24 * that tries to acquire the semaphore, while the "sleeping"
25 * variable is a count of such acquires.
26 *
27 * Notably, the inline "up()" and "down()" functions can
28 * efficiently test if they need to do any extra work (up
29 * needs to do something only if count was negative before
30 * the increment operation.
31 *
32 * "sleeping" and the contention routine ordering is protected
33 * by the spinlock in the semaphore's waitqueue head.
34 *
35 * Note that these functions are only called when there is
36 * contention on the lock, and as such all this is the
37 * "non-critical" part of the whole semaphore business. The
38 * critical part is the inline stuff in <asm/semaphore.h>
39 * where we want to avoid any extra jumps and calls.
40 */
41
42/*
43 * Logic:
44 * - only on a boundary condition do we need to care. When we go
45 * from a negative count to a non-negative, we wake people up.
46 * - when we go from a non-negative count to a negative do we
47 * (a) synchronize with the "sleeper" count and (b) make sure
48 * that we're on the wakeup list before we synchronize so that
49 * we cannot lose wakeup events.
50 */
51
52static fastcall void __attribute_used__ __up(struct semaphore *sem)
53{
54 wake_up(&sem->wait);
55}
56
57static fastcall void __attribute_used__ __sched __down(struct semaphore * sem)
58{
59 struct task_struct *tsk = current;
60 DECLARE_WAITQUEUE(wait, tsk);
61 unsigned long flags;
62
63 tsk->state = TASK_UNINTERRUPTIBLE;
64 spin_lock_irqsave(&sem->wait.lock, flags);
65 add_wait_queue_exclusive_locked(&sem->wait, &wait);
66
67 sem->sleepers++;
68 for (;;) {
69 int sleepers = sem->sleepers;
70
71 /*
72 * Add "everybody else" into it. They aren't
73 * playing, because we own the spinlock in
74 * the wait_queue_head.
75 */
76 if (!atomic_add_negative(sleepers - 1, &sem->count)) {
77 sem->sleepers = 0;
78 break;
79 }
80 sem->sleepers = 1; /* us - see -1 above */
81 spin_unlock_irqrestore(&sem->wait.lock, flags);
82
83 schedule();
84
85 spin_lock_irqsave(&sem->wait.lock, flags);
86 tsk->state = TASK_UNINTERRUPTIBLE;
87 }
88 remove_wait_queue_locked(&sem->wait, &wait);
89 wake_up_locked(&sem->wait);
90 spin_unlock_irqrestore(&sem->wait.lock, flags);
91 tsk->state = TASK_RUNNING;
92}
93
94static fastcall int __attribute_used__ __sched __down_interruptible(struct semaphore * sem)
95{
96 int retval = 0;
97 struct task_struct *tsk = current;
98 DECLARE_WAITQUEUE(wait, tsk);
99 unsigned long flags;
100
101 tsk->state = TASK_INTERRUPTIBLE;
102 spin_lock_irqsave(&sem->wait.lock, flags);
103 add_wait_queue_exclusive_locked(&sem->wait, &wait);
104
105 sem->sleepers++;
106 for (;;) {
107 int sleepers = sem->sleepers;
108
109 /*
110 * With signals pending, this turns into
111 * the trylock failure case - we won't be
112 * sleeping, and we* can't get the lock as
113 * it has contention. Just correct the count
114 * and exit.
115 */
116 if (signal_pending(current)) {
117 retval = -EINTR;
118 sem->sleepers = 0;
119 atomic_add(sleepers, &sem->count);
120 break;
121 }
122
123 /*
124 * Add "everybody else" into it. They aren't
125 * playing, because we own the spinlock in
126 * wait_queue_head. The "-1" is because we're
127 * still hoping to get the semaphore.
128 */
129 if (!atomic_add_negative(sleepers - 1, &sem->count)) {
130 sem->sleepers = 0;
131 break;
132 }
133 sem->sleepers = 1; /* us - see -1 above */
134 spin_unlock_irqrestore(&sem->wait.lock, flags);
135
136 schedule();
137
138 spin_lock_irqsave(&sem->wait.lock, flags);
139 tsk->state = TASK_INTERRUPTIBLE;
140 }
141 remove_wait_queue_locked(&sem->wait, &wait);
142 wake_up_locked(&sem->wait);
143 spin_unlock_irqrestore(&sem->wait.lock, flags);
144
145 tsk->state = TASK_RUNNING;
146 return retval;
147}
148
149/*
150 * Trylock failed - make sure we correct for
151 * having decremented the count.
152 *
153 * We could have done the trylock with a
154 * single "cmpxchg" without failure cases,
155 * but then it wouldn't work on a 386.
156 */
157static fastcall int __attribute_used__ __down_trylock(struct semaphore * sem)
158{
159 int sleepers;
160 unsigned long flags;
161
162 spin_lock_irqsave(&sem->wait.lock, flags);
163 sleepers = sem->sleepers + 1;
164 sem->sleepers = 0;
165
166 /*
167 * Add "everybody else" and us into it. They aren't
168 * playing, because we own the spinlock in the
169 * wait_queue_head.
170 */
171 if (!atomic_add_negative(sleepers, &sem->count)) {
172 wake_up_locked(&sem->wait);
173 }
174
175 spin_unlock_irqrestore(&sem->wait.lock, flags);
176 return 1;
177}
178
179
180/*
181 * The semaphore operations have a special calling sequence that 19 * The semaphore operations have a special calling sequence that
182 * allow us to do a simpler in-line version of them. These routines 20 * allow us to do a simpler in-line version of them. These routines
183 * need to convert that sequence back into the C sequence when 21 * need to convert that sequence back into the C sequence when
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index d52eda399a7a..a659d274914c 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -82,7 +82,7 @@ EXPORT_SYMBOL(efi_enabled);
82/* cpu data as detected by the assembly code in head.S */ 82/* cpu data as detected by the assembly code in head.S */
83struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; 83struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
84/* common cpu data for all cpus */ 84/* common cpu data for all cpus */
85struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; 85struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
86EXPORT_SYMBOL(boot_cpu_data); 86EXPORT_SYMBOL(boot_cpu_data);
87 87
88unsigned long mmu_cr4_features; 88unsigned long mmu_cr4_features;
@@ -370,12 +370,16 @@ static void __init limit_regions(unsigned long long size)
370 int i; 370 int i;
371 371
372 if (efi_enabled) { 372 if (efi_enabled) {
373 for (i = 0; i < memmap.nr_map; i++) { 373 efi_memory_desc_t *md;
374 current_addr = memmap.map[i].phys_addr + 374 void *p;
375 (memmap.map[i].num_pages << 12); 375
376 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) { 376 for (p = memmap.map, i = 0; p < memmap.map_end;
377 p += memmap.desc_size, i++) {
378 md = p;
379 current_addr = md->phys_addr + (md->num_pages << 12);
380 if (md->type == EFI_CONVENTIONAL_MEMORY) {
377 if (current_addr >= size) { 381 if (current_addr >= size) {
378 memmap.map[i].num_pages -= 382 md->num_pages -=
379 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); 383 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
380 memmap.nr_map = i + 1; 384 memmap.nr_map = i + 1;
381 return; 385 return;
@@ -1581,8 +1585,14 @@ void __init setup_arch(char **cmdline_p)
1581 */ 1585 */
1582 acpi_boot_table_init(); 1586 acpi_boot_table_init();
1583 acpi_boot_init(); 1587 acpi_boot_init();
1584#endif
1585 1588
1589#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
1590 if (def_to_bigsmp)
1591 printk(KERN_WARNING "More than 8 CPUs detected and "
1592 "CONFIG_X86_PC cannot handle it.\nUse "
1593 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
1594#endif
1595#endif
1586#ifdef CONFIG_X86_LOCAL_APIC 1596#ifdef CONFIG_X86_LOCAL_APIC
1587 if (smp_found_config) 1597 if (smp_found_config)
1588 get_smp_config(); 1598 get_smp_config();
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 140e340569c6..61eb0c8a6e47 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -278,9 +278,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
278 int tmp, err = 0; 278 int tmp, err = 0;
279 279
280 tmp = 0; 280 tmp = 0;
281 __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); 281 savesegment(gs, tmp);
282 err |= __put_user(tmp, (unsigned int __user *)&sc->gs); 282 err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
283 __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); 283 savesegment(fs, tmp);
284 err |= __put_user(tmp, (unsigned int __user *)&sc->fs); 284 err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
285 285
286 err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); 286 err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
@@ -604,7 +604,9 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
604 * We want the common case to go fast, which 604 * We want the common case to go fast, which
605 * is why we may in certain cases get here from 605 * is why we may in certain cases get here from
606 * kernel mode. Just return without doing anything 606 * kernel mode. Just return without doing anything
607 * if so. 607 * if so. vm86 regs switched out by assembly code
608 * before reaching here, so testing against kernel
609 * CS suffices.
608 */ 610 */
609 if (!user_mode(regs)) 611 if (!user_mode(regs))
610 return 1; 612 return 1;
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index cec4bde67161..48b55db3680f 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -576,7 +576,7 @@ static void stop_this_cpu (void * dummy)
576 local_irq_disable(); 576 local_irq_disable();
577 disable_local_APIC(); 577 disable_local_APIC();
578 if (cpu_data[smp_processor_id()].hlt_works_ok) 578 if (cpu_data[smp_processor_id()].hlt_works_ok)
579 for(;;) __asm__("hlt"); 579 for(;;) halt();
580 for (;;); 580 for (;;);
581} 581}
582 582
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 8ac8e9fd5614..5e4893d2b9f2 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -88,6 +88,8 @@ EXPORT_SYMBOL(cpu_online_map);
88cpumask_t cpu_callin_map; 88cpumask_t cpu_callin_map;
89cpumask_t cpu_callout_map; 89cpumask_t cpu_callout_map;
90EXPORT_SYMBOL(cpu_callout_map); 90EXPORT_SYMBOL(cpu_callout_map);
91cpumask_t cpu_possible_map;
92EXPORT_SYMBOL(cpu_possible_map);
91static cpumask_t smp_commenced_mask; 93static cpumask_t smp_commenced_mask;
92 94
93/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there 95/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
@@ -1017,8 +1019,8 @@ int __devinit smp_prepare_cpu(int cpu)
1017 tsc_sync_disabled = 1; 1019 tsc_sync_disabled = 1;
1018 1020
1019 /* init low mem mapping */ 1021 /* init low mem mapping */
1020 memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 1022 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
1021 sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS); 1023 KERNEL_PGD_PTRS);
1022 flush_tlb_all(); 1024 flush_tlb_all();
1023 schedule_work(&task); 1025 schedule_work(&task);
1024 wait_for_completion(&done); 1026 wait_for_completion(&done);
@@ -1265,6 +1267,7 @@ void __devinit smp_prepare_boot_cpu(void)
1265 cpu_set(smp_processor_id(), cpu_online_map); 1267 cpu_set(smp_processor_id(), cpu_online_map);
1266 cpu_set(smp_processor_id(), cpu_callout_map); 1268 cpu_set(smp_processor_id(), cpu_callout_map);
1267 cpu_set(smp_processor_id(), cpu_present_map); 1269 cpu_set(smp_processor_id(), cpu_present_map);
1270 cpu_set(smp_processor_id(), cpu_possible_map);
1268 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 1271 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
1269} 1272}
1270 1273
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 0ee9dee8af06..eefea7c55008 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -194,10 +194,7 @@ int do_settimeofday(struct timespec *tv)
194 set_normalized_timespec(&xtime, sec, nsec); 194 set_normalized_timespec(&xtime, sec, nsec);
195 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); 195 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
196 196
197 time_adjust = 0; /* stop active adjtime() */ 197 ntp_clear();
198 time_status |= STA_UNSYNC;
199 time_maxerror = NTP_PHASE_LIMIT;
200 time_esterror = NTP_PHASE_LIMIT;
201 write_sequnlock_irq(&xtime_lock); 198 write_sequnlock_irq(&xtime_lock);
202 clock_was_set(); 199 clock_was_set();
203 return 0; 200 return 0;
@@ -252,8 +249,7 @@ EXPORT_SYMBOL(profile_pc);
252 * timer_interrupt() needs to keep up the real-time clock, 249 * timer_interrupt() needs to keep up the real-time clock,
253 * as well as call the "do_timer()" routine every clocktick 250 * as well as call the "do_timer()" routine every clocktick
254 */ 251 */
255static inline void do_timer_interrupt(int irq, void *dev_id, 252static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
256 struct pt_regs *regs)
257{ 253{
258#ifdef CONFIG_X86_IO_APIC 254#ifdef CONFIG_X86_IO_APIC
259 if (timer_ack) { 255 if (timer_ack) {
@@ -307,7 +303,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
307 303
308 cur_timer->mark_offset(); 304 cur_timer->mark_offset();
309 305
310 do_timer_interrupt(irq, NULL, regs); 306 do_timer_interrupt(irq, regs);
311 307
312 write_sequnlock(&xtime_lock); 308 write_sequnlock(&xtime_lock);
313 return IRQ_HANDLED; 309 return IRQ_HANDLED;
@@ -348,7 +344,7 @@ static void sync_cmos_clock(unsigned long dummy)
348 * This code is run on a timer. If the clock is set, that timer 344 * This code is run on a timer. If the clock is set, that timer
349 * may not expire at the correct time. Thus, we adjust... 345 * may not expire at the correct time. Thus, we adjust...
350 */ 346 */
351 if ((time_status & STA_UNSYNC) != 0) 347 if (!ntp_synced())
352 /* 348 /*
353 * Not synced, exit, do not restart a timer (if one is 349 * Not synced, exit, do not restart a timer (if one is
354 * running, let it run out). 350 * running, let it run out).
@@ -383,6 +379,7 @@ void notify_arch_cmos_timer(void)
383 379
384static long clock_cmos_diff, sleep_start; 380static long clock_cmos_diff, sleep_start;
385 381
382static struct timer_opts *last_timer;
386static int timer_suspend(struct sys_device *dev, pm_message_t state) 383static int timer_suspend(struct sys_device *dev, pm_message_t state)
387{ 384{
388 /* 385 /*
@@ -391,6 +388,10 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state)
391 clock_cmos_diff = -get_cmos_time(); 388 clock_cmos_diff = -get_cmos_time();
392 clock_cmos_diff += get_seconds(); 389 clock_cmos_diff += get_seconds();
393 sleep_start = get_cmos_time(); 390 sleep_start = get_cmos_time();
391 last_timer = cur_timer;
392 cur_timer = &timer_none;
393 if (last_timer->suspend)
394 last_timer->suspend(state);
394 return 0; 395 return 0;
395} 396}
396 397
@@ -404,6 +405,7 @@ static int timer_resume(struct sys_device *dev)
404 if (is_hpet_enabled()) 405 if (is_hpet_enabled())
405 hpet_reenable(); 406 hpet_reenable();
406#endif 407#endif
408 setup_pit_timer();
407 sec = get_cmos_time() + clock_cmos_diff; 409 sec = get_cmos_time() + clock_cmos_diff;
408 sleep_length = (get_cmos_time() - sleep_start) * HZ; 410 sleep_length = (get_cmos_time() - sleep_start) * HZ;
409 write_seqlock_irqsave(&xtime_lock, flags); 411 write_seqlock_irqsave(&xtime_lock, flags);
@@ -412,6 +414,11 @@ static int timer_resume(struct sys_device *dev)
412 write_sequnlock_irqrestore(&xtime_lock, flags); 414 write_sequnlock_irqrestore(&xtime_lock, flags);
413 jiffies += sleep_length; 415 jiffies += sleep_length;
414 wall_jiffies += sleep_length; 416 wall_jiffies += sleep_length;
417 if (last_timer->resume)
418 last_timer->resume();
419 cur_timer = last_timer;
420 last_timer = NULL;
421 touch_softlockup_watchdog();
415 return 0; 422 return 0;
416} 423}
417 424
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
index ef8dac5dd33b..d973a8b681fd 100644
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ b/arch/i386/kernel/timers/timer_hpet.c
@@ -18,8 +18,8 @@
18#include "mach_timer.h" 18#include "mach_timer.h"
19#include <asm/hpet.h> 19#include <asm/hpet.h>
20 20
21static unsigned long __read_mostly hpet_usec_quotient; /* convert hpet clks to usec */ 21static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */
22static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */ 22static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */
23static unsigned long hpet_last; /* hpet counter value at last tick*/ 23static unsigned long hpet_last; /* hpet counter value at last tick*/
24static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ 24static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
25static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ 25static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
@@ -136,6 +136,8 @@ static void delay_hpet(unsigned long loops)
136 } while ((hpet_end - hpet_start) < (loops)); 136 } while ((hpet_end - hpet_start) < (loops));
137} 137}
138 138
139static struct timer_opts timer_hpet;
140
139static int __init init_hpet(char* override) 141static int __init init_hpet(char* override)
140{ 142{
141 unsigned long result, remain; 143 unsigned long result, remain;
@@ -163,6 +165,8 @@ static int __init init_hpet(char* override)
163 } 165 }
164 set_cyc2ns_scale(cpu_khz/1000); 166 set_cyc2ns_scale(cpu_khz/1000);
165 } 167 }
168 /* set this only when cpu_has_tsc */
169 timer_hpet.read_timer = read_timer_tsc;
166 } 170 }
167 171
168 /* 172 /*
@@ -177,6 +181,19 @@ static int __init init_hpet(char* override)
177 return 0; 181 return 0;
178} 182}
179 183
184static int hpet_resume(void)
185{
186 write_seqlock(&monotonic_lock);
187 /* Assume this is the last mark offset time */
188 rdtsc(last_tsc_low, last_tsc_high);
189
190 if (hpet_use_timer)
191 hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
192 else
193 hpet_last = hpet_readl(HPET_COUNTER);
194 write_sequnlock(&monotonic_lock);
195 return 0;
196}
180/************************************************************/ 197/************************************************************/
181 198
182/* tsc timer_opts struct */ 199/* tsc timer_opts struct */
@@ -186,7 +203,7 @@ static struct timer_opts timer_hpet __read_mostly = {
186 .get_offset = get_offset_hpet, 203 .get_offset = get_offset_hpet,
187 .monotonic_clock = monotonic_clock_hpet, 204 .monotonic_clock = monotonic_clock_hpet,
188 .delay = delay_hpet, 205 .delay = delay_hpet,
189 .read_timer = read_timer_tsc, 206 .resume = hpet_resume,
190}; 207};
191 208
192struct init_timer_opts __initdata timer_hpet_init = { 209struct init_timer_opts __initdata timer_hpet_init = {
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
index 06de036a820c..eddb64038234 100644
--- a/arch/i386/kernel/timers/timer_pit.c
+++ b/arch/i386/kernel/timers/timer_pit.c
@@ -175,30 +175,3 @@ void setup_pit_timer(void)
175 outb(LATCH >> 8 , PIT_CH0); /* MSB */ 175 outb(LATCH >> 8 , PIT_CH0); /* MSB */
176 spin_unlock_irqrestore(&i8253_lock, flags); 176 spin_unlock_irqrestore(&i8253_lock, flags);
177} 177}
178
179static int timer_resume(struct sys_device *dev)
180{
181 setup_pit_timer();
182 return 0;
183}
184
185static struct sysdev_class timer_sysclass = {
186 set_kset_name("timer_pit"),
187 .resume = timer_resume,
188};
189
190static struct sys_device device_timer = {
191 .id = 0,
192 .cls = &timer_sysclass,
193};
194
195static int __init init_timer_sysfs(void)
196{
197 int error = sysdev_class_register(&timer_sysclass);
198 if (!error)
199 error = sysdev_register(&device_timer);
200 return error;
201}
202
203device_initcall(init_timer_sysfs);
204
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
index 4ef20e663498..264edaaac315 100644
--- a/arch/i386/kernel/timers/timer_pm.c
+++ b/arch/i386/kernel/timers/timer_pm.c
@@ -186,6 +186,14 @@ static void mark_offset_pmtmr(void)
186 } 186 }
187} 187}
188 188
189static int pmtmr_resume(void)
190{
191 write_seqlock(&monotonic_lock);
192 /* Assume this is the last mark offset time */
193 offset_tick = read_pmtmr();
194 write_sequnlock(&monotonic_lock);
195 return 0;
196}
189 197
190static unsigned long long monotonic_clock_pmtmr(void) 198static unsigned long long monotonic_clock_pmtmr(void)
191{ 199{
@@ -247,6 +255,7 @@ static struct timer_opts timer_pmtmr = {
247 .monotonic_clock = monotonic_clock_pmtmr, 255 .monotonic_clock = monotonic_clock_pmtmr,
248 .delay = delay_pmtmr, 256 .delay = delay_pmtmr,
249 .read_timer = read_timer_tsc, 257 .read_timer = read_timer_tsc,
258 .resume = pmtmr_resume,
250}; 259};
251 260
252struct init_timer_opts __initdata timer_pmtmr_init = { 261struct init_timer_opts __initdata timer_pmtmr_init = {
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index 8f4e4d5bc560..6dd470cc9f72 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -543,6 +543,19 @@ static int __init init_tsc(char* override)
543 return -ENODEV; 543 return -ENODEV;
544} 544}
545 545
546static int tsc_resume(void)
547{
548 write_seqlock(&monotonic_lock);
549 /* Assume this is the last mark offset time */
550 rdtsc(last_tsc_low, last_tsc_high);
551#ifdef CONFIG_HPET_TIMER
552 if (is_hpet_enabled() && hpet_use_timer)
553 hpet_last = hpet_readl(HPET_COUNTER);
554#endif
555 write_sequnlock(&monotonic_lock);
556 return 0;
557}
558
546#ifndef CONFIG_X86_TSC 559#ifndef CONFIG_X86_TSC
547/* disable flag for tsc. Takes effect by clearing the TSC cpu flag 560/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
548 * in cpu/common.c */ 561 * in cpu/common.c */
@@ -573,6 +586,7 @@ static struct timer_opts timer_tsc = {
573 .monotonic_clock = monotonic_clock_tsc, 586 .monotonic_clock = monotonic_clock_tsc,
574 .delay = delay_tsc, 587 .delay = delay_tsc,
575 .read_timer = read_timer_tsc, 588 .read_timer = read_timer_tsc,
589 .resume = tsc_resume,
576}; 590};
577 591
578struct init_timer_opts __initdata timer_tsc_init = { 592struct init_timer_opts __initdata timer_tsc_init = {
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index cd2d5d5514fe..09a58cb6daa7 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -210,7 +210,7 @@ void show_registers(struct pt_regs *regs)
210 unsigned short ss; 210 unsigned short ss;
211 211
212 esp = (unsigned long) (&regs->esp); 212 esp = (unsigned long) (&regs->esp);
213 ss = __KERNEL_DS; 213 savesegment(ss, ss);
214 if (user_mode(regs)) { 214 if (user_mode(regs)) {
215 in_kernel = 0; 215 in_kernel = 0;
216 esp = regs->esp; 216 esp = regs->esp;
@@ -267,9 +267,6 @@ static void handle_BUG(struct pt_regs *regs)
267 char c; 267 char c;
268 unsigned long eip; 268 unsigned long eip;
269 269
270 if (user_mode(regs))
271 goto no_bug; /* Not in kernel */
272
273 eip = regs->eip; 270 eip = regs->eip;
274 271
275 if (eip < PAGE_OFFSET) 272 if (eip < PAGE_OFFSET)
@@ -366,8 +363,9 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long e
366 die(str, regs, err); 363 die(str, regs, err);
367} 364}
368 365
369static void do_trap(int trapnr, int signr, char *str, int vm86, 366static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
370 struct pt_regs * regs, long error_code, siginfo_t *info) 367 struct pt_regs * regs, long error_code,
368 siginfo_t *info)
371{ 369{
372 struct task_struct *tsk = current; 370 struct task_struct *tsk = current;
373 tsk->thread.error_code = error_code; 371 tsk->thread.error_code = error_code;
@@ -463,7 +461,8 @@ DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
463DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) 461DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
464DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) 462DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
465 463
466fastcall void do_general_protection(struct pt_regs * regs, long error_code) 464fastcall void __kprobes do_general_protection(struct pt_regs * regs,
465 long error_code)
467{ 466{
468 int cpu = get_cpu(); 467 int cpu = get_cpu();
469 struct tss_struct *tss = &per_cpu(init_tss, cpu); 468 struct tss_struct *tss = &per_cpu(init_tss, cpu);
@@ -568,6 +567,10 @@ static DEFINE_SPINLOCK(nmi_print_lock);
568 567
569void die_nmi (struct pt_regs *regs, const char *msg) 568void die_nmi (struct pt_regs *regs, const char *msg)
570{ 569{
570 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
571 NOTIFY_STOP)
572 return;
573
571 spin_lock(&nmi_print_lock); 574 spin_lock(&nmi_print_lock);
572 /* 575 /*
573 * We are in trouble anyway, lets at least try 576 * We are in trouble anyway, lets at least try
@@ -656,7 +659,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
656 659
657 ++nmi_count(cpu); 660 ++nmi_count(cpu);
658 661
659 if (!nmi_callback(regs, cpu)) 662 if (!rcu_dereference(nmi_callback)(regs, cpu))
660 default_do_nmi(regs); 663 default_do_nmi(regs);
661 664
662 nmi_exit(); 665 nmi_exit();
@@ -664,7 +667,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
664 667
665void set_nmi_callback(nmi_callback_t callback) 668void set_nmi_callback(nmi_callback_t callback)
666{ 669{
667 nmi_callback = callback; 670 rcu_assign_pointer(nmi_callback, callback);
668} 671}
669EXPORT_SYMBOL_GPL(set_nmi_callback); 672EXPORT_SYMBOL_GPL(set_nmi_callback);
670 673
@@ -675,7 +678,7 @@ void unset_nmi_callback(void)
675EXPORT_SYMBOL_GPL(unset_nmi_callback); 678EXPORT_SYMBOL_GPL(unset_nmi_callback);
676 679
677#ifdef CONFIG_KPROBES 680#ifdef CONFIG_KPROBES
678fastcall void do_int3(struct pt_regs *regs, long error_code) 681fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
679{ 682{
680 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) 683 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
681 == NOTIFY_STOP) 684 == NOTIFY_STOP)
@@ -709,7 +712,7 @@ fastcall void do_int3(struct pt_regs *regs, long error_code)
709 * find every occurrence of the TF bit that could be saved away even 712 * find every occurrence of the TF bit that could be saved away even
710 * by user code) 713 * by user code)
711 */ 714 */
712fastcall void do_debug(struct pt_regs * regs, long error_code) 715fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code)
713{ 716{
714 unsigned int condition; 717 unsigned int condition;
715 struct task_struct *tsk = current; 718 struct task_struct *tsk = current;
@@ -1008,7 +1011,7 @@ void __init trap_init_f00f_bug(void)
1008 * it uses the read-only mapped virtual address. 1011 * it uses the read-only mapped virtual address.
1009 */ 1012 */
1010 idt_descr.address = fix_to_virt(FIX_F00F_IDT); 1013 idt_descr.address = fix_to_virt(FIX_F00F_IDT);
1011 __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); 1014 load_idt(&idt_descr);
1012} 1015}
1013#endif 1016#endif
1014 1017
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index ec0f68ce6886..16b485009622 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -294,8 +294,8 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
294 */ 294 */
295 info->regs32->eax = 0; 295 info->regs32->eax = 0;
296 tsk->thread.saved_esp0 = tsk->thread.esp0; 296 tsk->thread.saved_esp0 = tsk->thread.esp0;
297 asm volatile("mov %%fs,%0":"=m" (tsk->thread.saved_fs)); 297 savesegment(fs, tsk->thread.saved_fs);
298 asm volatile("mov %%gs,%0":"=m" (tsk->thread.saved_gs)); 298 savesegment(gs, tsk->thread.saved_gs);
299 299
300 tss = &per_cpu(init_tss, get_cpu()); 300 tss = &per_cpu(init_tss, get_cpu());
301 tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; 301 tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
@@ -542,7 +542,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
542 unsigned char opcode; 542 unsigned char opcode;
543 unsigned char __user *csp; 543 unsigned char __user *csp;
544 unsigned char __user *ssp; 544 unsigned char __user *ssp;
545 unsigned short ip, sp; 545 unsigned short ip, sp, orig_flags;
546 int data32, pref_done; 546 int data32, pref_done;
547 547
548#define CHECK_IF_IN_TRAP \ 548#define CHECK_IF_IN_TRAP \
@@ -551,8 +551,12 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
551#define VM86_FAULT_RETURN do { \ 551#define VM86_FAULT_RETURN do { \
552 if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \ 552 if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \
553 return_to_32bit(regs, VM86_PICRETURN); \ 553 return_to_32bit(regs, VM86_PICRETURN); \
554 if (orig_flags & TF_MASK) \
555 handle_vm86_trap(regs, 0, 1); \
554 return; } while (0) 556 return; } while (0)
555 557
558 orig_flags = *(unsigned short *)&regs->eflags;
559
556 csp = (unsigned char __user *) (regs->cs << 4); 560 csp = (unsigned char __user *) (regs->cs << 4);
557 ssp = (unsigned char __user *) (regs->ss << 4); 561 ssp = (unsigned char __user *) (regs->ss << 4);
558 sp = SP(regs); 562 sp = SP(regs);
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 761972f8cb6c..13b9c62cbbb4 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -22,6 +22,7 @@ SECTIONS
22 *(.text) 22 *(.text)
23 SCHED_TEXT 23 SCHED_TEXT
24 LOCK_TEXT 24 LOCK_TEXT
25 KPROBES_TEXT
25 *(.fixup) 26 *(.fixup)
26 *(.gnu.warning) 27 *(.gnu.warning)
27 } = 0x9090 28 } = 0x9090
diff --git a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S
index c8fcf75b9be3..68afa50dd7cf 100644
--- a/arch/i386/kernel/vsyscall-sigreturn.S
+++ b/arch/i386/kernel/vsyscall-sigreturn.S
@@ -15,7 +15,7 @@
15*/ 15*/
16 16
17 .text 17 .text
18 .org __kernel_vsyscall+32 18 .org __kernel_vsyscall+32,0x90
19 .globl __kernel_sigreturn 19 .globl __kernel_sigreturn
20 .type __kernel_sigreturn,@function 20 .type __kernel_sigreturn,@function
21__kernel_sigreturn: 21__kernel_sigreturn:
@@ -35,6 +35,7 @@ __kernel_rt_sigreturn:
35 int $0x80 35 int $0x80
36.LEND_rt_sigreturn: 36.LEND_rt_sigreturn:
37 .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn 37 .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
38 .balign 32
38 .previous 39 .previous
39 40
40 .section .eh_frame,"a",@progbits 41 .section .eh_frame,"a",@progbits