aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile2
-rw-r--r--arch/i386/kernel/acpi/boot.c3
-rw-r--r--arch/i386/kernel/cpu/common.c16
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.c12
-rw-r--r--arch/i386/kernel/cpu/cyrix.c6
-rw-r--r--arch/i386/kernel/cpu/intel.c9
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c9
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/i386/kernel/crash.c2
-rw-r--r--arch/i386/kernel/doublefault.c2
-rw-r--r--arch/i386/kernel/efi.c110
-rw-r--r--arch/i386/kernel/entry.S9
-rw-r--r--arch/i386/kernel/head.S48
-rw-r--r--arch/i386/kernel/i8237.c67
-rw-r--r--arch/i386/kernel/ioport.c7
-rw-r--r--arch/i386/kernel/ldt.c7
-rw-r--r--arch/i386/kernel/machine_kexec.c18
-rw-r--r--arch/i386/kernel/microcode.c7
-rw-r--r--arch/i386/kernel/mpparse.c17
-rw-r--r--arch/i386/kernel/msr.c31
-rw-r--r--arch/i386/kernel/nmi.c5
-rw-r--r--arch/i386/kernel/process.c43
-rw-r--r--arch/i386/kernel/ptrace.c63
-rw-r--r--arch/i386/kernel/reboot.c9
-rw-r--r--arch/i386/kernel/semaphore.c162
-rw-r--r--arch/i386/kernel/setup.c22
-rw-r--r--arch/i386/kernel/signal.c8
-rw-r--r--arch/i386/kernel/smp.c2
-rw-r--r--arch/i386/kernel/smpboot.c7
-rw-r--r--arch/i386/kernel/time.c10
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c19
-rw-r--r--arch/i386/kernel/timers/timer_pit.c27
-rw-r--r--arch/i386/kernel/timers/timer_pm.c9
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c14
-rw-r--r--arch/i386/kernel/traps.c11
-rw-r--r--arch/i386/kernel/vm86.c10
-rw-r--r--arch/i386/kernel/vsyscall-sigreturn.S3
37 files changed, 400 insertions, 408 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 4cc83b322b36..64682a0edacf 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.lds
7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ 7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
9 pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ 9 pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
10 doublefault.o quirks.o 10 doublefault.o quirks.o i8237.o
11 11
12obj-y += cpu/ 12obj-y += cpu/
13obj-y += timers/ 13obj-y += timers/
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index b7808a89d945..34ee500c26e5 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -833,6 +833,9 @@ acpi_process_madt(void)
833 if (!error) { 833 if (!error) {
834 acpi_lapic = 1; 834 acpi_lapic = 1;
835 835
836#ifdef CONFIG_X86_GENERICARCH
837 generic_bigsmp_probe();
838#endif
836 /* 839 /*
837 * Parse MADT IO-APIC entries 840 * Parse MADT IO-APIC entries
838 */ 841 */
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 4553ffd94b1f..46ce9b248f55 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -613,8 +613,8 @@ void __devinit cpu_init(void)
613 memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu), 613 memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu),
614 GDT_ENTRY_TLS_ENTRIES * 8); 614 GDT_ENTRY_TLS_ENTRIES * 8);
615 615
616 __asm__ __volatile__("lgdt %0" : : "m" (cpu_gdt_descr[cpu])); 616 load_gdt(&cpu_gdt_descr[cpu]);
617 __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); 617 load_idt(&idt_descr);
618 618
619 /* 619 /*
620 * Delete NT 620 * Delete NT
@@ -642,12 +642,12 @@ void __devinit cpu_init(void)
642 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); 642 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
643 643
644 /* Clear all 6 debug registers: */ 644 /* Clear all 6 debug registers: */
645 645 set_debugreg(0, 0);
646#define CD(register) set_debugreg(0, register) 646 set_debugreg(0, 1);
647 647 set_debugreg(0, 2);
648 CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); 648 set_debugreg(0, 3);
649 649 set_debugreg(0, 6);
650#undef CD 650 set_debugreg(0, 7);
651 651
652 /* 652 /*
653 * Force FPU initialization: 653 * Force FPU initialization:
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index 04e3563da4fe..bf02b5026e62 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -64,8 +64,6 @@ static int dont_scale_voltage;
64#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) 64#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
65 65
66 66
67#define __hlt() __asm__ __volatile__("hlt": : :"memory")
68
69/* Clock ratios multiplied by 10 */ 67/* Clock ratios multiplied by 10 */
70static int clock_ratio[32]; 68static int clock_ratio[32];
71static int eblcr_table[32]; 69static int eblcr_table[32];
@@ -168,11 +166,9 @@ static void do_powersaver(union msr_longhaul *longhaul,
168 outb(0xFE,0x21); /* TMR0 only */ 166 outb(0xFE,0x21); /* TMR0 only */
169 outb(0xFF,0x80); /* delay */ 167 outb(0xFF,0x80); /* delay */
170 168
171 local_irq_enable(); 169 safe_halt();
172
173 __hlt();
174 wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); 170 wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
175 __hlt(); 171 halt();
176 172
177 local_irq_disable(); 173 local_irq_disable();
178 174
@@ -251,9 +247,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
251 bcr2.bits.CLOCKMUL = clock_ratio_index; 247 bcr2.bits.CLOCKMUL = clock_ratio_index;
252 local_irq_disable(); 248 local_irq_disable();
253 wrmsrl (MSR_VIA_BCR2, bcr2.val); 249 wrmsrl (MSR_VIA_BCR2, bcr2.val);
254 local_irq_enable(); 250 safe_halt();
255
256 __hlt();
257 251
258 /* Disable software clock multiplier */ 252 /* Disable software clock multiplier */
259 rdmsrl (MSR_VIA_BCR2, bcr2.val); 253 rdmsrl (MSR_VIA_BCR2, bcr2.val);
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index ba4b01138c8f..ff87cc22b323 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -132,11 +132,7 @@ static void __init set_cx86_memwb(void)
132 setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); 132 setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
133 /* set 'Not Write-through' */ 133 /* set 'Not Write-through' */
134 cr0 = 0x20000000; 134 cr0 = 0x20000000;
135 __asm__("movl %%cr0,%%eax\n\t" 135 write_cr0(read_cr0() | cr0);
136 "orl %0,%%eax\n\t"
137 "movl %%eax,%%cr0\n"
138 : : "r" (cr0)
139 :"ax");
140 /* CCR2 bit 2: lock NW bit and set WT1 */ 136 /* CCR2 bit 2: lock NW bit and set WT1 */
141 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); 137 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
142} 138}
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index a2c33c1a46c5..43601de0f633 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -82,16 +82,13 @@ static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
82 */ 82 */
83static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) 83static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
84{ 84{
85 unsigned int eax; 85 unsigned int eax, ebx, ecx, edx;
86 86
87 if (c->cpuid_level < 4) 87 if (c->cpuid_level < 4)
88 return 1; 88 return 1;
89 89
90 __asm__("cpuid" 90 /* Intel has a non-standard dependency on %ecx for this CPUID level. */
91 : "=a" (eax) 91 cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
92 : "0" (4), "c" (0)
93 : "bx", "dx");
94
95 if (eax & 0x1f) 92 if (eax & 0x1f)
96 return ((eax >> 26) + 1); 93 return ((eax >> 26) + 1);
97 else 94 else
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index 6c55b50cf048..9e0d5f83cb9f 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -305,6 +305,9 @@ static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
305{ 305{
306 struct _cpuid4_info *this_leaf; 306 struct _cpuid4_info *this_leaf;
307 unsigned long num_threads_sharing; 307 unsigned long num_threads_sharing;
308#ifdef CONFIG_X86_HT
309 struct cpuinfo_x86 *c = cpu_data + cpu;
310#endif
308 311
309 this_leaf = CPUID4_INFO_IDX(cpu, index); 312 this_leaf = CPUID4_INFO_IDX(cpu, index);
310 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 313 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
@@ -314,10 +317,12 @@ static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
314#ifdef CONFIG_X86_HT 317#ifdef CONFIG_X86_HT
315 else if (num_threads_sharing == smp_num_siblings) 318 else if (num_threads_sharing == smp_num_siblings)
316 this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; 319 this_leaf->shared_cpu_map = cpu_sibling_map[cpu];
317#endif 320 else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings))
321 this_leaf->shared_cpu_map = cpu_core_map[cpu];
318 else 322 else
319 printk(KERN_INFO "Number of CPUs sharing cache didn't match " 323 printk(KERN_DEBUG "Number of CPUs sharing cache didn't match "
320 "any known set of CPUs\n"); 324 "any known set of CPUs\n");
325#endif
321} 326}
322#else 327#else
323static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} 328static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 764cac64e211..dd4ebd6af7e4 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -561,7 +561,7 @@ struct mtrr_value {
561 561
562static struct mtrr_value * mtrr_state; 562static struct mtrr_value * mtrr_state;
563 563
564static int mtrr_save(struct sys_device * sysdev, u32 state) 564static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
565{ 565{
566 int i; 566 int i;
567 int size = num_var_ranges * sizeof(struct mtrr_value); 567 int size = num_var_ranges * sizeof(struct mtrr_value);
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index e5fab12f7926..913be77bb844 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -153,7 +153,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
153 disable_local_APIC(); 153 disable_local_APIC();
154 atomic_dec(&waiting_for_crash_ipi); 154 atomic_dec(&waiting_for_crash_ipi);
155 /* Assume hlt works */ 155 /* Assume hlt works */
156 __asm__("hlt"); 156 halt();
157 for(;;); 157 for(;;);
158 158
159 return 1; 159 return 1;
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c
index 789af3e9fb1f..5edb1d379add 100644
--- a/arch/i386/kernel/doublefault.c
+++ b/arch/i386/kernel/doublefault.c
@@ -20,7 +20,7 @@ static void doublefault_fn(void)
20 struct Xgt_desc_struct gdt_desc = {0, 0}; 20 struct Xgt_desc_struct gdt_desc = {0, 0};
21 unsigned long gdt, tss; 21 unsigned long gdt, tss;
22 22
23 __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory"); 23 store_gdt(&gdt_desc);
24 gdt = gdt_desc.address; 24 gdt = gdt_desc.address;
25 25
26 printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); 26 printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index 385883ea8c19..ecad519fd395 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -79,7 +79,7 @@ static void efi_call_phys_prelog(void)
79 * directory. If I have PSE, I just need to duplicate one entry in 79 * directory. If I have PSE, I just need to duplicate one entry in
80 * page directory. 80 * page directory.
81 */ 81 */
82 __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); 82 cr4 = read_cr4();
83 83
84 if (cr4 & X86_CR4_PSE) { 84 if (cr4 & X86_CR4_PSE) {
85 efi_bak_pg_dir_pointer[0].pgd = 85 efi_bak_pg_dir_pointer[0].pgd =
@@ -104,8 +104,7 @@ static void efi_call_phys_prelog(void)
104 local_flush_tlb(); 104 local_flush_tlb();
105 105
106 cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); 106 cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address);
107 __asm__ __volatile__("lgdt %0":"=m" 107 load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0]));
108 (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])));
109} 108}
110 109
111static void efi_call_phys_epilog(void) 110static void efi_call_phys_epilog(void)
@@ -114,8 +113,8 @@ static void efi_call_phys_epilog(void)
114 113
115 cpu_gdt_descr[0].address = 114 cpu_gdt_descr[0].address =
116 (unsigned long) __va(cpu_gdt_descr[0].address); 115 (unsigned long) __va(cpu_gdt_descr[0].address);
117 __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr)); 116 load_gdt(&cpu_gdt_descr[0]);
118 __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); 117 cr4 = read_cr4();
119 118
120 if (cr4 & X86_CR4_PSE) { 119 if (cr4 & X86_CR4_PSE) {
121 swapper_pg_dir[pgd_index(0)].pgd = 120 swapper_pg_dir[pgd_index(0)].pgd =
@@ -233,22 +232,23 @@ void __init efi_map_memmap(void)
233{ 232{
234 memmap.map = NULL; 233 memmap.map = NULL;
235 234
236 memmap.map = (efi_memory_desc_t *) 235 memmap.map = bt_ioremap((unsigned long) memmap.phys_map,
237 bt_ioremap((unsigned long) memmap.phys_map, 236 (memmap.nr_map * memmap.desc_size));
238 (memmap.nr_map * sizeof(efi_memory_desc_t)));
239
240 if (memmap.map == NULL) 237 if (memmap.map == NULL)
241 printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); 238 printk(KERN_ERR PFX "Could not remap the EFI memmap!\n");
239
240 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
242} 241}
243 242
244#if EFI_DEBUG 243#if EFI_DEBUG
245static void __init print_efi_memmap(void) 244static void __init print_efi_memmap(void)
246{ 245{
247 efi_memory_desc_t *md; 246 efi_memory_desc_t *md;
247 void *p;
248 int i; 248 int i;
249 249
250 for (i = 0; i < memmap.nr_map; i++) { 250 for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
251 md = &memmap.map[i]; 251 md = p;
252 printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " 252 printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, "
253 "range=[0x%016llx-0x%016llx) (%lluMB)\n", 253 "range=[0x%016llx-0x%016llx) (%lluMB)\n",
254 i, md->type, md->attribute, md->phys_addr, 254 i, md->type, md->attribute, md->phys_addr,
@@ -271,10 +271,10 @@ void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
271 } prev, curr; 271 } prev, curr;
272 efi_memory_desc_t *md; 272 efi_memory_desc_t *md;
273 unsigned long start, end; 273 unsigned long start, end;
274 int i; 274 void *p;
275 275
276 for (i = 0; i < memmap.nr_map; i++) { 276 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
277 md = &memmap.map[i]; 277 md = p;
278 278
279 if ((md->num_pages == 0) || (!is_available_memory(md))) 279 if ((md->num_pages == 0) || (!is_available_memory(md)))
280 continue; 280 continue;
@@ -325,6 +325,7 @@ void __init efi_init(void)
325 memmap.phys_map = EFI_MEMMAP; 325 memmap.phys_map = EFI_MEMMAP;
326 memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; 326 memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE;
327 memmap.desc_version = EFI_MEMDESC_VERSION; 327 memmap.desc_version = EFI_MEMDESC_VERSION;
328 memmap.desc_size = EFI_MEMDESC_SIZE;
328 329
329 efi.systab = (efi_system_table_t *) 330 efi.systab = (efi_system_table_t *)
330 boot_ioremap((unsigned long) efi_phys.systab, 331 boot_ioremap((unsigned long) efi_phys.systab,
@@ -428,22 +429,30 @@ void __init efi_init(void)
428 printk(KERN_ERR PFX "Could not map the runtime service table!\n"); 429 printk(KERN_ERR PFX "Could not map the runtime service table!\n");
429 430
430 /* Map the EFI memory map for use until paging_init() */ 431 /* Map the EFI memory map for use until paging_init() */
431 432 memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE);
432 memmap.map = (efi_memory_desc_t *)
433 boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE);
434
435 if (memmap.map == NULL) 433 if (memmap.map == NULL)
436 printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); 434 printk(KERN_ERR PFX "Could not map the EFI memory map!\n");
437 435
438 if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) { 436 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
439 printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't " 437
440 "match the one from EFI!\n");
441 }
442#if EFI_DEBUG 438#if EFI_DEBUG
443 print_efi_memmap(); 439 print_efi_memmap();
444#endif 440#endif
445} 441}
446 442
443static inline void __init check_range_for_systab(efi_memory_desc_t *md)
444{
445 if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) &&
446 ((unsigned long)efi_phys.systab < md->phys_addr +
447 ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) {
448 unsigned long addr;
449
450 addr = md->virt_addr - md->phys_addr +
451 (unsigned long)efi_phys.systab;
452 efi.systab = (efi_system_table_t *)addr;
453 }
454}
455
447/* 456/*
448 * This function will switch the EFI runtime services to virtual mode. 457 * This function will switch the EFI runtime services to virtual mode.
449 * Essentially, look through the EFI memmap and map every region that 458 * Essentially, look through the EFI memmap and map every region that
@@ -457,43 +466,32 @@ void __init efi_enter_virtual_mode(void)
457{ 466{
458 efi_memory_desc_t *md; 467 efi_memory_desc_t *md;
459 efi_status_t status; 468 efi_status_t status;
460 int i; 469 void *p;
461 470
462 efi.systab = NULL; 471 efi.systab = NULL;
463 472
464 for (i = 0; i < memmap.nr_map; i++) { 473 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
465 md = &memmap.map[i]; 474 md = p;
466 475
467 if (md->attribute & EFI_MEMORY_RUNTIME) { 476 if (!(md->attribute & EFI_MEMORY_RUNTIME))
468 md->virt_addr = 477 continue;
469 (unsigned long)ioremap(md->phys_addr,
470 md->num_pages << EFI_PAGE_SHIFT);
471 if (!(unsigned long)md->virt_addr) {
472 printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
473 (unsigned long)md->phys_addr);
474 }
475 478
476 if (((unsigned long)md->phys_addr <= 479 md->virt_addr = (unsigned long)ioremap(md->phys_addr,
477 (unsigned long)efi_phys.systab) && 480 md->num_pages << EFI_PAGE_SHIFT);
478 ((unsigned long)efi_phys.systab < 481 if (!(unsigned long)md->virt_addr) {
479 md->phys_addr + 482 printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
480 ((unsigned long)md->num_pages << 483 (unsigned long)md->phys_addr);
481 EFI_PAGE_SHIFT))) {
482 unsigned long addr;
483
484 addr = md->virt_addr - md->phys_addr +
485 (unsigned long)efi_phys.systab;
486 efi.systab = (efi_system_table_t *)addr;
487 }
488 } 484 }
485 /* update the virtual address of the EFI system table */
486 check_range_for_systab(md);
489 } 487 }
490 488
491 if (!efi.systab) 489 if (!efi.systab)
492 BUG(); 490 BUG();
493 491
494 status = phys_efi_set_virtual_address_map( 492 status = phys_efi_set_virtual_address_map(
495 sizeof(efi_memory_desc_t) * memmap.nr_map, 493 memmap.desc_size * memmap.nr_map,
496 sizeof(efi_memory_desc_t), 494 memmap.desc_size,
497 memmap.desc_version, 495 memmap.desc_version,
498 memmap.phys_map); 496 memmap.phys_map);
499 497
@@ -533,10 +531,10 @@ efi_initialize_iomem_resources(struct resource *code_resource,
533{ 531{
534 struct resource *res; 532 struct resource *res;
535 efi_memory_desc_t *md; 533 efi_memory_desc_t *md;
536 int i; 534 void *p;
537 535
538 for (i = 0; i < memmap.nr_map; i++) { 536 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
539 md = &memmap.map[i]; 537 md = p;
540 538
541 if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > 539 if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >
542 0x100000000ULL) 540 0x100000000ULL)
@@ -613,10 +611,10 @@ efi_initialize_iomem_resources(struct resource *code_resource,
613u32 efi_mem_type(unsigned long phys_addr) 611u32 efi_mem_type(unsigned long phys_addr)
614{ 612{
615 efi_memory_desc_t *md; 613 efi_memory_desc_t *md;
616 int i; 614 void *p;
617 615
618 for (i = 0; i < memmap.nr_map; i++) { 616 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
619 md = &memmap.map[i]; 617 md = p;
620 if ((md->phys_addr <= phys_addr) && (phys_addr < 618 if ((md->phys_addr <= phys_addr) && (phys_addr <
621 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) 619 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
622 return md->type; 620 return md->type;
@@ -627,10 +625,10 @@ u32 efi_mem_type(unsigned long phys_addr)
627u64 efi_mem_attributes(unsigned long phys_addr) 625u64 efi_mem_attributes(unsigned long phys_addr)
628{ 626{
629 efi_memory_desc_t *md; 627 efi_memory_desc_t *md;
630 int i; 628 void *p;
631 629
632 for (i = 0; i < memmap.nr_map; i++) { 630 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
633 md = &memmap.map[i]; 631 md = p;
634 if ((md->phys_addr <= phys_addr) && (phys_addr < 632 if ((md->phys_addr <= phys_addr) && (phys_addr <
635 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) 633 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
636 return md->attribute; 634 return md->attribute;
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index a991d4e5edd2..abb909793efc 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -203,7 +203,7 @@ sysenter_past_esp:
203 GET_THREAD_INFO(%ebp) 203 GET_THREAD_INFO(%ebp)
204 204
205 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 205 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
206 testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) 206 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
207 jnz syscall_trace_entry 207 jnz syscall_trace_entry
208 cmpl $(nr_syscalls), %eax 208 cmpl $(nr_syscalls), %eax
209 jae syscall_badsys 209 jae syscall_badsys
@@ -226,9 +226,9 @@ ENTRY(system_call)
226 pushl %eax # save orig_eax 226 pushl %eax # save orig_eax
227 SAVE_ALL 227 SAVE_ALL
228 GET_THREAD_INFO(%ebp) 228 GET_THREAD_INFO(%ebp)
229 # system call tracing in operation 229 # system call tracing in operation / emulation
230 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 230 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
231 testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) 231 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
232 jnz syscall_trace_entry 232 jnz syscall_trace_entry
233 cmpl $(nr_syscalls), %eax 233 cmpl $(nr_syscalls), %eax
234 jae syscall_badsys 234 jae syscall_badsys
@@ -338,6 +338,9 @@ syscall_trace_entry:
338 movl %esp, %eax 338 movl %esp, %eax
339 xorl %edx,%edx 339 xorl %edx,%edx
340 call do_syscall_trace 340 call do_syscall_trace
341 cmpl $0, %eax
342 jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
343 # so must skip actual syscall
341 movl ORIG_EAX(%esp), %eax 344 movl ORIG_EAX(%esp), %eax
342 cmpl $(nr_syscalls), %eax 345 cmpl $(nr_syscalls), %eax
343 jnae syscall_call 346 jnae syscall_call
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 4477bb107098..0480ca9e9e57 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -77,6 +77,32 @@ ENTRY(startup_32)
77 subl %edi,%ecx 77 subl %edi,%ecx
78 shrl $2,%ecx 78 shrl $2,%ecx
79 rep ; stosl 79 rep ; stosl
80/*
81 * Copy bootup parameters out of the way.
82 * Note: %esi still has the pointer to the real-mode data.
83 * With the kexec as boot loader, parameter segment might be loaded beyond
84 * kernel image and might not even be addressable by early boot page tables.
85 * (kexec on panic case). Hence copy out the parameters before initializing
86 * page tables.
87 */
88 movl $(boot_params - __PAGE_OFFSET),%edi
89 movl $(PARAM_SIZE/4),%ecx
90 cld
91 rep
92 movsl
93 movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
94 andl %esi,%esi
95 jnz 2f # New command line protocol
96 cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
97 jne 1f
98 movzwl OLD_CL_OFFSET,%esi
99 addl $(OLD_CL_BASE_ADDR),%esi
1002:
101 movl $(saved_command_line - __PAGE_OFFSET),%edi
102 movl $(COMMAND_LINE_SIZE/4),%ecx
103 rep
104 movsl
1051:
80 106
81/* 107/*
82 * Initialize page tables. This creates a PDE and a set of page 108 * Initialize page tables. This creates a PDE and a set of page
@@ -214,28 +240,6 @@ ENTRY(startup_32_smp)
214 */ 240 */
215 call setup_idt 241 call setup_idt
216 242
217/*
218 * Copy bootup parameters out of the way.
219 * Note: %esi still has the pointer to the real-mode data.
220 */
221 movl $boot_params,%edi
222 movl $(PARAM_SIZE/4),%ecx
223 cld
224 rep
225 movsl
226 movl boot_params+NEW_CL_POINTER,%esi
227 andl %esi,%esi
228 jnz 2f # New command line protocol
229 cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
230 jne 1f
231 movzwl OLD_CL_OFFSET,%esi
232 addl $(OLD_CL_BASE_ADDR),%esi
2332:
234 movl $saved_command_line,%edi
235 movl $(COMMAND_LINE_SIZE/4),%ecx
236 rep
237 movsl
2381:
239checkCPUtype: 243checkCPUtype:
240 244
241 movl $-1,X86_CPUID # -1 for no CPUID initially 245 movl $-1,X86_CPUID # -1 for no CPUID initially
diff --git a/arch/i386/kernel/i8237.c b/arch/i386/kernel/i8237.c
new file mode 100644
index 000000000000..c36d1c006c2f
--- /dev/null
+++ b/arch/i386/kernel/i8237.c
@@ -0,0 +1,67 @@
1/*
2 * i8237.c: 8237A DMA controller suspend functions.
3 *
4 * Written by Pierre Ossman, 2005.
5 */
6
7#include <linux/init.h>
8#include <linux/sysdev.h>
9
10#include <asm/dma.h>
11
12/*
13 * This module just handles suspend/resume issues with the
14 * 8237A DMA controller (used for ISA and LPC).
15 * Allocation is handled in kernel/dma.c and normal usage is
16 * in asm/dma.h.
17 */
18
19static int i8237A_resume(struct sys_device *dev)
20{
21 unsigned long flags;
22 int i;
23
24 flags = claim_dma_lock();
25
26 dma_outb(DMA1_RESET_REG, 0);
27 dma_outb(DMA2_RESET_REG, 0);
28
29 for (i = 0;i < 8;i++) {
30 set_dma_addr(i, 0x000000);
31 /* DMA count is a bit weird so this is not 0 */
32 set_dma_count(i, 1);
33 }
34
35 /* Enable cascade DMA or channel 0-3 won't work */
36 enable_dma(4);
37
38 release_dma_lock(flags);
39
40 return 0;
41}
42
43static int i8237A_suspend(struct sys_device *dev, pm_message_t state)
44{
45 return 0;
46}
47
48static struct sysdev_class i8237_sysdev_class = {
49 set_kset_name("i8237"),
50 .suspend = i8237A_suspend,
51 .resume = i8237A_resume,
52};
53
54static struct sys_device device_i8237A = {
55 .id = 0,
56 .cls = &i8237_sysdev_class,
57};
58
59static int __init i8237A_init_sysfs(void)
60{
61 int error = sysdev_class_register(&i8237_sysdev_class);
62 if (!error)
63 error = sysdev_register(&device_i8237A);
64 return error;
65}
66
67device_initcall(i8237A_init_sysfs);
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index 8b25160393c1..f2b37654777f 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -132,6 +132,7 @@ asmlinkage long sys_iopl(unsigned long unused)
132 volatile struct pt_regs * regs = (struct pt_regs *) &unused; 132 volatile struct pt_regs * regs = (struct pt_regs *) &unused;
133 unsigned int level = regs->ebx; 133 unsigned int level = regs->ebx;
134 unsigned int old = (regs->eflags >> 12) & 3; 134 unsigned int old = (regs->eflags >> 12) & 3;
135 struct thread_struct *t = &current->thread;
135 136
136 if (level > 3) 137 if (level > 3)
137 return -EINVAL; 138 return -EINVAL;
@@ -140,8 +141,8 @@ asmlinkage long sys_iopl(unsigned long unused)
140 if (!capable(CAP_SYS_RAWIO)) 141 if (!capable(CAP_SYS_RAWIO))
141 return -EPERM; 142 return -EPERM;
142 } 143 }
143 regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12); 144 t->iopl = level << 12;
144 /* Make sure we return the long way (not sysenter) */ 145 regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
145 set_thread_flag(TIF_IRET); 146 set_iopl_mask(t->iopl);
146 return 0; 147 return 0;
147} 148}
diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c
index bb50afbee921..fe1ffa55587d 100644
--- a/arch/i386/kernel/ldt.c
+++ b/arch/i386/kernel/ldt.c
@@ -177,7 +177,7 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount)
177static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) 177static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
178{ 178{
179 struct mm_struct * mm = current->mm; 179 struct mm_struct * mm = current->mm;
180 __u32 entry_1, entry_2, *lp; 180 __u32 entry_1, entry_2;
181 int error; 181 int error;
182 struct user_desc ldt_info; 182 struct user_desc ldt_info;
183 183
@@ -205,8 +205,6 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
205 goto out_unlock; 205 goto out_unlock;
206 } 206 }
207 207
208 lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
209
210 /* Allow LDTs to be cleared by the user. */ 208 /* Allow LDTs to be cleared by the user. */
211 if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { 209 if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
212 if (oldmode || LDT_empty(&ldt_info)) { 210 if (oldmode || LDT_empty(&ldt_info)) {
@@ -223,8 +221,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
223 221
224 /* Install the new entry ... */ 222 /* Install the new entry ... */
225install: 223install:
226 *lp = entry_1; 224 write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2);
227 *(lp+1) = entry_2;
228 error = 0; 225 error = 0;
229 226
230out_unlock: 227out_unlock:
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
index cb699a2aa1f8..a912fed48482 100644
--- a/arch/i386/kernel/machine_kexec.c
+++ b/arch/i386/kernel/machine_kexec.c
@@ -17,13 +17,7 @@
17#include <asm/apic.h> 17#include <asm/apic.h>
18#include <asm/cpufeature.h> 18#include <asm/cpufeature.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
20 20#include <asm/system.h>
21static inline unsigned long read_cr3(void)
22{
23 unsigned long cr3;
24 asm volatile("movl %%cr3,%0": "=r"(cr3));
25 return cr3;
26}
27 21
28#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) 22#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
29 23
@@ -99,10 +93,7 @@ static void set_idt(void *newidt, __u16 limit)
99 curidt.size = limit; 93 curidt.size = limit;
100 curidt.address = (unsigned long)newidt; 94 curidt.address = (unsigned long)newidt;
101 95
102 __asm__ __volatile__ ( 96 load_idt(&curidt);
103 "lidtl %0\n"
104 : : "m" (curidt)
105 );
106}; 97};
107 98
108 99
@@ -114,10 +105,7 @@ static void set_gdt(void *newgdt, __u16 limit)
114 curgdt.size = limit; 105 curgdt.size = limit;
115 curgdt.address = (unsigned long)newgdt; 106 curgdt.address = (unsigned long)newgdt;
116 107
117 __asm__ __volatile__ ( 108 load_gdt(&curgdt);
118 "lgdtl %0\n"
119 : : "m" (curgdt)
120 );
121}; 109};
122 110
123static void load_segments(void) 111static void load_segments(void)
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index a77c612aad00..165f13158c60 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -164,7 +164,8 @@ static void collect_cpu_info (void *unused)
164 } 164 }
165 165
166 wrmsr(MSR_IA32_UCODE_REV, 0, 0); 166 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
167 __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); 167 /* see notes above for revision 1.07. Apparent chip bug */
168 serialize_cpu();
168 /* get the current revision from MSR 0x8B */ 169 /* get the current revision from MSR 0x8B */
169 rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); 170 rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev);
170 pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", 171 pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
@@ -377,7 +378,9 @@ static void do_update_one (void * unused)
377 (unsigned long) uci->mc->bits >> 16 >> 16); 378 (unsigned long) uci->mc->bits >> 16 >> 16);
378 wrmsr(MSR_IA32_UCODE_REV, 0, 0); 379 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
379 380
380 __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); 381 /* see notes above for revision 1.07. Apparent chip bug */
382 serialize_cpu();
383
381 /* get the current revision from MSR 0x8B */ 384 /* get the current revision from MSR 0x8B */
382 rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); 385 rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
383 386
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index ce838abb27d8..5d0b9a8fc43d 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -65,6 +65,8 @@ int nr_ioapics;
65int pic_mode; 65int pic_mode;
66unsigned long mp_lapic_addr; 66unsigned long mp_lapic_addr;
67 67
68unsigned int def_to_bigsmp = 0;
69
68/* Processor that is doing the boot up */ 70/* Processor that is doing the boot up */
69unsigned int boot_cpu_physical_apicid = -1U; 71unsigned int boot_cpu_physical_apicid = -1U;
70/* Internal processor count */ 72/* Internal processor count */
@@ -120,7 +122,7 @@ static int MP_valid_apicid(int apicid, int version)
120 122
121static void __init MP_processor_info (struct mpc_config_processor *m) 123static void __init MP_processor_info (struct mpc_config_processor *m)
122{ 124{
123 int ver, apicid; 125 int ver, apicid, cpu, found_bsp = 0;
124 physid_mask_t tmp; 126 physid_mask_t tmp;
125 127
126 if (!(m->mpc_cpuflag & CPU_ENABLED)) 128 if (!(m->mpc_cpuflag & CPU_ENABLED))
@@ -179,6 +181,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
179 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { 181 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
180 Dprintk(" Bootup CPU\n"); 182 Dprintk(" Bootup CPU\n");
181 boot_cpu_physical_apicid = m->mpc_apicid; 183 boot_cpu_physical_apicid = m->mpc_apicid;
184 found_bsp = 1;
182 } 185 }
183 186
184 if (num_processors >= NR_CPUS) { 187 if (num_processors >= NR_CPUS) {
@@ -202,6 +205,11 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
202 return; 205 return;
203 } 206 }
204 207
208 if (found_bsp)
209 cpu = 0;
210 else
211 cpu = num_processors - 1;
212 cpu_set(cpu, cpu_possible_map);
205 tmp = apicid_to_cpu_present(apicid); 213 tmp = apicid_to_cpu_present(apicid);
206 physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp); 214 physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp);
207 215
@@ -213,6 +221,13 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
213 ver = 0x10; 221 ver = 0x10;
214 } 222 }
215 apic_version[m->mpc_apicid] = ver; 223 apic_version[m->mpc_apicid] = ver;
224 if ((num_processors > 8) &&
225 APIC_XAPIC(ver) &&
226 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
227 def_to_bigsmp = 1;
228 else
229 def_to_bigsmp = 0;
230
216 bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; 231 bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
217} 232}
218 233
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
index b2f03c39a6fe..03100d6fc5d6 100644
--- a/arch/i386/kernel/msr.c
+++ b/arch/i386/kernel/msr.c
@@ -46,23 +46,13 @@
46 46
47static struct class *msr_class; 47static struct class *msr_class;
48 48
49/* Note: "err" is handled in a funny way below. Otherwise one version
50 of gcc or another breaks. */
51
52static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx) 49static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx)
53{ 50{
54 int err; 51 int err;
55 52
56 asm volatile ("1: wrmsr\n" 53 err = wrmsr_safe(reg, eax, edx);
57 "2:\n" 54 if (err)
58 ".section .fixup,\"ax\"\n" 55 err = -EIO;
59 "3: movl %4,%0\n"
60 " jmp 2b\n"
61 ".previous\n"
62 ".section __ex_table,\"a\"\n"
63 " .align 4\n" " .long 1b,3b\n" ".previous":"=&bDS" (err)
64 :"a"(eax), "d"(edx), "c"(reg), "i"(-EIO), "0"(0));
65
66 return err; 56 return err;
67} 57}
68 58
@@ -70,18 +60,9 @@ static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx)
70{ 60{
71 int err; 61 int err;
72 62
73 asm volatile ("1: rdmsr\n" 63 err = rdmsr_safe(reg, eax, edx);
74 "2:\n" 64 if (err)
75 ".section .fixup,\"ax\"\n" 65 err = -EIO;
76 "3: movl %4,%0\n"
77 " jmp 2b\n"
78 ".previous\n"
79 ".section __ex_table,\"a\"\n"
80 " .align 4\n"
81 " .long 1b,3b\n"
82 ".previous":"=&bDS" (err), "=a"(*eax), "=d"(*edx)
83 :"c"(reg), "i"(-EIO), "0"(0));
84
85 return err; 66 return err;
86} 67}
87 68
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 8c242bb1ef45..8bbdbda07a2d 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -501,8 +501,11 @@ void nmi_watchdog_tick (struct pt_regs * regs)
501 */ 501 */
502 alert_counter[cpu]++; 502 alert_counter[cpu]++;
503 if (alert_counter[cpu] == 5*nmi_hz) 503 if (alert_counter[cpu] == 5*nmi_hz)
504 /*
505 * die_nmi will return ONLY if NOTIFY_STOP happens..
506 */
504 die_nmi(regs, "NMI Watchdog detected LOCKUP"); 507 die_nmi(regs, "NMI Watchdog detected LOCKUP");
505 } else { 508
506 last_irq_sums[cpu] = sum; 509 last_irq_sums[cpu] = sum;
507 alert_counter[cpu] = 0; 510 alert_counter[cpu] = 0;
508 } 511 }
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index e3f362e8af5b..b45cbf93d439 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -164,7 +164,7 @@ static inline void play_dead(void)
164 */ 164 */
165 local_irq_disable(); 165 local_irq_disable();
166 while (1) 166 while (1)
167 __asm__ __volatile__("hlt":::"memory"); 167 halt();
168} 168}
169#else 169#else
170static inline void play_dead(void) 170static inline void play_dead(void)
@@ -313,16 +313,12 @@ void show_regs(struct pt_regs * regs)
313 printk(" DS: %04x ES: %04x\n", 313 printk(" DS: %04x ES: %04x\n",
314 0xffff & regs->xds,0xffff & regs->xes); 314 0xffff & regs->xds,0xffff & regs->xes);
315 315
316 __asm__("movl %%cr0, %0": "=r" (cr0)); 316 cr0 = read_cr0();
317 __asm__("movl %%cr2, %0": "=r" (cr2)); 317 cr2 = read_cr2();
318 __asm__("movl %%cr3, %0": "=r" (cr3)); 318 cr3 = read_cr3();
319 /* This could fault if %cr4 does not exist */ 319 if (current_cpu_data.x86 > 4) {
320 __asm__("1: movl %%cr4, %0 \n" 320 cr4 = read_cr4();
321 "2: \n" 321 }
322 ".section __ex_table,\"a\" \n"
323 ".long 1b,2b \n"
324 ".previous \n"
325 : "=r" (cr4): "0" (0));
326 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); 322 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
327 show_trace(NULL, &regs->esp); 323 show_trace(NULL, &regs->esp);
328} 324}
@@ -682,21 +678,26 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
682 __unlazy_fpu(prev_p); 678 __unlazy_fpu(prev_p);
683 679
684 /* 680 /*
685 * Reload esp0, LDT and the page table pointer: 681 * Reload esp0.
686 */ 682 */
687 load_esp0(tss, next); 683 load_esp0(tss, next);
688 684
689 /* 685 /*
690 * Load the per-thread Thread-Local Storage descriptor. 686 * Save away %fs and %gs. No need to save %es and %ds, as
687 * those are always kernel segments while inside the kernel.
688 * Doing this before setting the new TLS descriptors avoids
689 * the situation where we temporarily have non-reloadable
690 * segments in %fs and %gs. This could be an issue if the
691 * NMI handler ever used %fs or %gs (it does not today), or
692 * if the kernel is running inside of a hypervisor layer.
691 */ 693 */
692 load_TLS(next, cpu); 694 savesegment(fs, prev->fs);
695 savesegment(gs, prev->gs);
693 696
694 /* 697 /*
695 * Save away %fs and %gs. No need to save %es and %ds, as 698 * Load the per-thread Thread-Local Storage descriptor.
696 * those are always kernel segments while inside the kernel.
697 */ 699 */
698 asm volatile("mov %%fs,%0":"=m" (prev->fs)); 700 load_TLS(next, cpu);
699 asm volatile("mov %%gs,%0":"=m" (prev->gs));
700 701
701 /* 702 /*
702 * Restore %fs and %gs if needed. 703 * Restore %fs and %gs if needed.
@@ -711,6 +712,12 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
711 loadsegment(gs, next->gs); 712 loadsegment(gs, next->gs);
712 713
713 /* 714 /*
715 * Restore IOPL if needed.
716 */
717 if (unlikely(prev->iopl != next->iopl))
718 set_iopl_mask(next->iopl);
719
720 /*
714 * Now maybe reload the debug registers 721 * Now maybe reload the debug registers
715 */ 722 */
716 if (unlikely(next->debugreg[7])) { 723 if (unlikely(next->debugreg[7])) {
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index 0da59b42843c..340980203b09 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -271,6 +271,8 @@ static void clear_singlestep(struct task_struct *child)
271void ptrace_disable(struct task_struct *child) 271void ptrace_disable(struct task_struct *child)
272{ 272{
273 clear_singlestep(child); 273 clear_singlestep(child);
274 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
275 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
274} 276}
275 277
276/* 278/*
@@ -509,15 +511,20 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
509 } 511 }
510 break; 512 break;
511 513
514 case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */
512 case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ 515 case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
513 case PTRACE_CONT: /* restart after signal. */ 516 case PTRACE_CONT: /* restart after signal. */
514 ret = -EIO; 517 ret = -EIO;
515 if (!valid_signal(data)) 518 if (!valid_signal(data))
516 break; 519 break;
517 if (request == PTRACE_SYSCALL) { 520 if (request == PTRACE_SYSEMU) {
521 set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
522 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
523 } else if (request == PTRACE_SYSCALL) {
518 set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); 524 set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
519 } 525 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
520 else { 526 } else {
527 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
521 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); 528 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
522 } 529 }
523 child->exit_code = data; 530 child->exit_code = data;
@@ -542,10 +549,17 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
542 wake_up_process(child); 549 wake_up_process(child);
543 break; 550 break;
544 551
552 case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */
545 case PTRACE_SINGLESTEP: /* set the trap flag. */ 553 case PTRACE_SINGLESTEP: /* set the trap flag. */
546 ret = -EIO; 554 ret = -EIO;
547 if (!valid_signal(data)) 555 if (!valid_signal(data))
548 break; 556 break;
557
558 if (request == PTRACE_SYSEMU_SINGLESTEP)
559 set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
560 else
561 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
562
549 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); 563 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
550 set_singlestep(child); 564 set_singlestep(child);
551 child->exit_code = data; 565 child->exit_code = data;
@@ -678,26 +692,52 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
678 * - triggered by current->work.syscall_trace 692 * - triggered by current->work.syscall_trace
679 */ 693 */
680__attribute__((regparm(3))) 694__attribute__((regparm(3)))
681void do_syscall_trace(struct pt_regs *regs, int entryexit) 695int do_syscall_trace(struct pt_regs *regs, int entryexit)
682{ 696{
697 int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU), ret = 0;
698 /* With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
699 * interception. */
700 int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
701
683 /* do the secure computing check first */ 702 /* do the secure computing check first */
684 secure_computing(regs->orig_eax); 703 secure_computing(regs->orig_eax);
685 704
686 if (unlikely(current->audit_context) && entryexit) 705 if (unlikely(current->audit_context)) {
687 audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax); 706 if (entryexit)
707 audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax);
708 /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
709 * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
710 * not used, entry.S will call us only on syscall exit, not
711 * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
712 * calling send_sigtrap() on syscall entry.
713 *
714 * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
715 * is_singlestep is false, despite his name, so we will still do
716 * the correct thing.
717 */
718 else if (is_singlestep)
719 goto out;
720 }
688 721
689 if (!(current->ptrace & PT_PTRACED)) 722 if (!(current->ptrace & PT_PTRACED))
690 goto out; 723 goto out;
691 724
725 /* If a process stops on the 1st tracepoint with SYSCALL_TRACE
726 * and then is resumed with SYSEMU_SINGLESTEP, it will come in
727 * here. We have to check this and return */
728 if (is_sysemu && entryexit)
729 return 0;
730
692 /* Fake a debug trap */ 731 /* Fake a debug trap */
693 if (test_thread_flag(TIF_SINGLESTEP)) 732 if (is_singlestep)
694 send_sigtrap(current, regs, 0); 733 send_sigtrap(current, regs, 0);
695 734
696 if (!test_thread_flag(TIF_SYSCALL_TRACE)) 735 if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
697 goto out; 736 goto out;
698 737
699 /* the 0x80 provides a way for the tracing parent to distinguish 738 /* the 0x80 provides a way for the tracing parent to distinguish
700 between a syscall stop and SIGTRAP delivery */ 739 between a syscall stop and SIGTRAP delivery */
740 /* Note that the debugger could change the result of test_thread_flag!*/
701 ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); 741 ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
702 742
703 /* 743 /*
@@ -709,9 +749,16 @@ void do_syscall_trace(struct pt_regs *regs, int entryexit)
709 send_sig(current->exit_code, current, 1); 749 send_sig(current->exit_code, current, 1);
710 current->exit_code = 0; 750 current->exit_code = 0;
711 } 751 }
752 ret = is_sysemu;
712 out: 753 out:
713 if (unlikely(current->audit_context) && !entryexit) 754 if (unlikely(current->audit_context) && !entryexit)
714 audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax, 755 audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax,
715 regs->ebx, regs->ecx, regs->edx, regs->esi); 756 regs->ebx, regs->ecx, regs->edx, regs->esi);
757 if (ret == 0)
758 return 0;
716 759
760 regs->orig_eax = -1; /* force skip of syscall restarting */
761 if (unlikely(current->audit_context))
762 audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax);
763 return 1;
717} 764}
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index c71fef31dc47..1cbb9c0f4704 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -13,6 +13,7 @@
13#include <linux/dmi.h> 13#include <linux/dmi.h>
14#include <asm/uaccess.h> 14#include <asm/uaccess.h>
15#include <asm/apic.h> 15#include <asm/apic.h>
16#include <asm/desc.h>
16#include "mach_reboot.h" 17#include "mach_reboot.h"
17#include <linux/reboot_fixups.h> 18#include <linux/reboot_fixups.h>
18 19
@@ -242,13 +243,13 @@ void machine_real_restart(unsigned char *code, int length)
242 243
243 /* Set up the IDT for real mode. */ 244 /* Set up the IDT for real mode. */
244 245
245 __asm__ __volatile__ ("lidt %0" : : "m" (real_mode_idt)); 246 load_idt(&real_mode_idt);
246 247
247 /* Set up a GDT from which we can load segment descriptors for real 248 /* Set up a GDT from which we can load segment descriptors for real
248 mode. The GDT is not used in real mode; it is just needed here to 249 mode. The GDT is not used in real mode; it is just needed here to
249 prepare the descriptors. */ 250 prepare the descriptors. */
250 251
251 __asm__ __volatile__ ("lgdt %0" : : "m" (real_mode_gdt)); 252 load_gdt(&real_mode_gdt);
252 253
253 /* Load the data segment registers, and thus the descriptors ready for 254 /* Load the data segment registers, and thus the descriptors ready for
254 real mode. The base address of each segment is 0x100, 16 times the 255 real mode. The base address of each segment is 0x100, 16 times the
@@ -316,7 +317,7 @@ void machine_emergency_restart(void)
316 if (!reboot_thru_bios) { 317 if (!reboot_thru_bios) {
317 if (efi_enabled) { 318 if (efi_enabled) {
318 efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); 319 efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL);
319 __asm__ __volatile__("lidt %0": :"m" (no_idt)); 320 load_idt(&no_idt);
320 __asm__ __volatile__("int3"); 321 __asm__ __volatile__("int3");
321 } 322 }
322 /* rebooting needs to touch the page at absolute addr 0 */ 323 /* rebooting needs to touch the page at absolute addr 0 */
@@ -325,7 +326,7 @@ void machine_emergency_restart(void)
325 mach_reboot_fixups(); /* for board specific fixups */ 326 mach_reboot_fixups(); /* for board specific fixups */
326 mach_reboot(); 327 mach_reboot();
327 /* That didn't work - force a triple fault.. */ 328 /* That didn't work - force a triple fault.. */
328 __asm__ __volatile__("lidt %0": :"m" (no_idt)); 329 load_idt(&no_idt);
329 __asm__ __volatile__("int3"); 330 __asm__ __volatile__("int3");
330 } 331 }
331 } 332 }
diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c
index 469f496e55c0..7455ab643943 100644
--- a/arch/i386/kernel/semaphore.c
+++ b/arch/i386/kernel/semaphore.c
@@ -13,171 +13,9 @@
13 * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> 13 * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
14 */ 14 */
15#include <linux/config.h> 15#include <linux/config.h>
16#include <linux/sched.h>
17#include <linux/err.h>
18#include <linux/init.h>
19#include <asm/semaphore.h> 16#include <asm/semaphore.h>
20 17
21/* 18/*
22 * Semaphores are implemented using a two-way counter:
23 * The "count" variable is decremented for each process
24 * that tries to acquire the semaphore, while the "sleeping"
25 * variable is a count of such acquires.
26 *
27 * Notably, the inline "up()" and "down()" functions can
28 * efficiently test if they need to do any extra work (up
29 * needs to do something only if count was negative before
30 * the increment operation.
31 *
32 * "sleeping" and the contention routine ordering is protected
33 * by the spinlock in the semaphore's waitqueue head.
34 *
35 * Note that these functions are only called when there is
36 * contention on the lock, and as such all this is the
37 * "non-critical" part of the whole semaphore business. The
38 * critical part is the inline stuff in <asm/semaphore.h>
39 * where we want to avoid any extra jumps and calls.
40 */
41
42/*
43 * Logic:
44 * - only on a boundary condition do we need to care. When we go
45 * from a negative count to a non-negative, we wake people up.
46 * - when we go from a non-negative count to a negative do we
47 * (a) synchronize with the "sleeper" count and (b) make sure
48 * that we're on the wakeup list before we synchronize so that
49 * we cannot lose wakeup events.
50 */
51
52static fastcall void __attribute_used__ __up(struct semaphore *sem)
53{
54 wake_up(&sem->wait);
55}
56
57static fastcall void __attribute_used__ __sched __down(struct semaphore * sem)
58{
59 struct task_struct *tsk = current;
60 DECLARE_WAITQUEUE(wait, tsk);
61 unsigned long flags;
62
63 tsk->state = TASK_UNINTERRUPTIBLE;
64 spin_lock_irqsave(&sem->wait.lock, flags);
65 add_wait_queue_exclusive_locked(&sem->wait, &wait);
66
67 sem->sleepers++;
68 for (;;) {
69 int sleepers = sem->sleepers;
70
71 /*
72 * Add "everybody else" into it. They aren't
73 * playing, because we own the spinlock in
74 * the wait_queue_head.
75 */
76 if (!atomic_add_negative(sleepers - 1, &sem->count)) {
77 sem->sleepers = 0;
78 break;
79 }
80 sem->sleepers = 1; /* us - see -1 above */
81 spin_unlock_irqrestore(&sem->wait.lock, flags);
82
83 schedule();
84
85 spin_lock_irqsave(&sem->wait.lock, flags);
86 tsk->state = TASK_UNINTERRUPTIBLE;
87 }
88 remove_wait_queue_locked(&sem->wait, &wait);
89 wake_up_locked(&sem->wait);
90 spin_unlock_irqrestore(&sem->wait.lock, flags);
91 tsk->state = TASK_RUNNING;
92}
93
94static fastcall int __attribute_used__ __sched __down_interruptible(struct semaphore * sem)
95{
96 int retval = 0;
97 struct task_struct *tsk = current;
98 DECLARE_WAITQUEUE(wait, tsk);
99 unsigned long flags;
100
101 tsk->state = TASK_INTERRUPTIBLE;
102 spin_lock_irqsave(&sem->wait.lock, flags);
103 add_wait_queue_exclusive_locked(&sem->wait, &wait);
104
105 sem->sleepers++;
106 for (;;) {
107 int sleepers = sem->sleepers;
108
109 /*
110 * With signals pending, this turns into
111 * the trylock failure case - we won't be
112 * sleeping, and we* can't get the lock as
113 * it has contention. Just correct the count
114 * and exit.
115 */
116 if (signal_pending(current)) {
117 retval = -EINTR;
118 sem->sleepers = 0;
119 atomic_add(sleepers, &sem->count);
120 break;
121 }
122
123 /*
124 * Add "everybody else" into it. They aren't
125 * playing, because we own the spinlock in
126 * wait_queue_head. The "-1" is because we're
127 * still hoping to get the semaphore.
128 */
129 if (!atomic_add_negative(sleepers - 1, &sem->count)) {
130 sem->sleepers = 0;
131 break;
132 }
133 sem->sleepers = 1; /* us - see -1 above */
134 spin_unlock_irqrestore(&sem->wait.lock, flags);
135
136 schedule();
137
138 spin_lock_irqsave(&sem->wait.lock, flags);
139 tsk->state = TASK_INTERRUPTIBLE;
140 }
141 remove_wait_queue_locked(&sem->wait, &wait);
142 wake_up_locked(&sem->wait);
143 spin_unlock_irqrestore(&sem->wait.lock, flags);
144
145 tsk->state = TASK_RUNNING;
146 return retval;
147}
148
149/*
150 * Trylock failed - make sure we correct for
151 * having decremented the count.
152 *
153 * We could have done the trylock with a
154 * single "cmpxchg" without failure cases,
155 * but then it wouldn't work on a 386.
156 */
157static fastcall int __attribute_used__ __down_trylock(struct semaphore * sem)
158{
159 int sleepers;
160 unsigned long flags;
161
162 spin_lock_irqsave(&sem->wait.lock, flags);
163 sleepers = sem->sleepers + 1;
164 sem->sleepers = 0;
165
166 /*
167 * Add "everybody else" and us into it. They aren't
168 * playing, because we own the spinlock in the
169 * wait_queue_head.
170 */
171 if (!atomic_add_negative(sleepers, &sem->count)) {
172 wake_up_locked(&sem->wait);
173 }
174
175 spin_unlock_irqrestore(&sem->wait.lock, flags);
176 return 1;
177}
178
179
180/*
181 * The semaphore operations have a special calling sequence that 19 * The semaphore operations have a special calling sequence that
182 * allow us to do a simpler in-line version of them. These routines 20 * allow us to do a simpler in-line version of them. These routines
183 * need to convert that sequence back into the C sequence when 21 * need to convert that sequence back into the C sequence when
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index af4de58cab54..294bcca985ab 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -370,12 +370,16 @@ static void __init limit_regions(unsigned long long size)
370 int i; 370 int i;
371 371
372 if (efi_enabled) { 372 if (efi_enabled) {
373 for (i = 0; i < memmap.nr_map; i++) { 373 efi_memory_desc_t *md;
374 current_addr = memmap.map[i].phys_addr + 374 void *p;
375 (memmap.map[i].num_pages << 12); 375
376 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) { 376 for (p = memmap.map, i = 0; p < memmap.map_end;
377 p += memmap.desc_size, i++) {
378 md = p;
379 current_addr = md->phys_addr + (md->num_pages << 12);
380 if (md->type == EFI_CONVENTIONAL_MEMORY) {
377 if (current_addr >= size) { 381 if (current_addr >= size) {
378 memmap.map[i].num_pages -= 382 md->num_pages -=
379 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); 383 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
380 memmap.nr_map = i + 1; 384 memmap.nr_map = i + 1;
381 return; 385 return;
@@ -1581,8 +1585,14 @@ void __init setup_arch(char **cmdline_p)
1581 */ 1585 */
1582 acpi_boot_table_init(); 1586 acpi_boot_table_init();
1583 acpi_boot_init(); 1587 acpi_boot_init();
1584#endif
1585 1588
1589#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
1590 if (def_to_bigsmp)
1591 printk(KERN_WARNING "More than 8 CPUs detected and "
1592 "CONFIG_X86_PC cannot handle it.\nUse "
1593 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
1594#endif
1595#endif
1586#ifdef CONFIG_X86_LOCAL_APIC 1596#ifdef CONFIG_X86_LOCAL_APIC
1587 if (smp_found_config) 1597 if (smp_found_config)
1588 get_smp_config(); 1598 get_smp_config();
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 140e340569c6..61eb0c8a6e47 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -278,9 +278,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
278 int tmp, err = 0; 278 int tmp, err = 0;
279 279
280 tmp = 0; 280 tmp = 0;
281 __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); 281 savesegment(gs, tmp);
282 err |= __put_user(tmp, (unsigned int __user *)&sc->gs); 282 err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
283 __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); 283 savesegment(fs, tmp);
284 err |= __put_user(tmp, (unsigned int __user *)&sc->fs); 284 err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
285 285
286 err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); 286 err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
@@ -604,7 +604,9 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
604 * We want the common case to go fast, which 604 * We want the common case to go fast, which
605 * is why we may in certain cases get here from 605 * is why we may in certain cases get here from
606 * kernel mode. Just return without doing anything 606 * kernel mode. Just return without doing anything
607 * if so. 607 * if so. vm86 regs switched out by assembly code
608 * before reaching here, so testing against kernel
609 * CS suffices.
608 */ 610 */
609 if (!user_mode(regs)) 611 if (!user_mode(regs))
610 return 1; 612 return 1;
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index cec4bde67161..48b55db3680f 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -576,7 +576,7 @@ static void stop_this_cpu (void * dummy)
576 local_irq_disable(); 576 local_irq_disable();
577 disable_local_APIC(); 577 disable_local_APIC();
578 if (cpu_data[smp_processor_id()].hlt_works_ok) 578 if (cpu_data[smp_processor_id()].hlt_works_ok)
579 for(;;) __asm__("hlt"); 579 for(;;) halt();
580 for (;;); 580 for (;;);
581} 581}
582 582
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 8ac8e9fd5614..5e4893d2b9f2 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -88,6 +88,8 @@ EXPORT_SYMBOL(cpu_online_map);
88cpumask_t cpu_callin_map; 88cpumask_t cpu_callin_map;
89cpumask_t cpu_callout_map; 89cpumask_t cpu_callout_map;
90EXPORT_SYMBOL(cpu_callout_map); 90EXPORT_SYMBOL(cpu_callout_map);
91cpumask_t cpu_possible_map;
92EXPORT_SYMBOL(cpu_possible_map);
91static cpumask_t smp_commenced_mask; 93static cpumask_t smp_commenced_mask;
92 94
93/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there 95/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
@@ -1017,8 +1019,8 @@ int __devinit smp_prepare_cpu(int cpu)
1017 tsc_sync_disabled = 1; 1019 tsc_sync_disabled = 1;
1018 1020
1019 /* init low mem mapping */ 1021 /* init low mem mapping */
1020 memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 1022 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
1021 sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS); 1023 KERNEL_PGD_PTRS);
1022 flush_tlb_all(); 1024 flush_tlb_all();
1023 schedule_work(&task); 1025 schedule_work(&task);
1024 wait_for_completion(&done); 1026 wait_for_completion(&done);
@@ -1265,6 +1267,7 @@ void __devinit smp_prepare_boot_cpu(void)
1265 cpu_set(smp_processor_id(), cpu_online_map); 1267 cpu_set(smp_processor_id(), cpu_online_map);
1266 cpu_set(smp_processor_id(), cpu_callout_map); 1268 cpu_set(smp_processor_id(), cpu_callout_map);
1267 cpu_set(smp_processor_id(), cpu_present_map); 1269 cpu_set(smp_processor_id(), cpu_present_map);
1270 cpu_set(smp_processor_id(), cpu_possible_map);
1268 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 1271 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
1269} 1272}
1270 1273
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 0ee9dee8af06..6f794a78ee1e 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -383,6 +383,7 @@ void notify_arch_cmos_timer(void)
383 383
384static long clock_cmos_diff, sleep_start; 384static long clock_cmos_diff, sleep_start;
385 385
386static struct timer_opts *last_timer;
386static int timer_suspend(struct sys_device *dev, pm_message_t state) 387static int timer_suspend(struct sys_device *dev, pm_message_t state)
387{ 388{
388 /* 389 /*
@@ -391,6 +392,10 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state)
391 clock_cmos_diff = -get_cmos_time(); 392 clock_cmos_diff = -get_cmos_time();
392 clock_cmos_diff += get_seconds(); 393 clock_cmos_diff += get_seconds();
393 sleep_start = get_cmos_time(); 394 sleep_start = get_cmos_time();
395 last_timer = cur_timer;
396 cur_timer = &timer_none;
397 if (last_timer->suspend)
398 last_timer->suspend(state);
394 return 0; 399 return 0;
395} 400}
396 401
@@ -404,6 +409,7 @@ static int timer_resume(struct sys_device *dev)
404 if (is_hpet_enabled()) 409 if (is_hpet_enabled())
405 hpet_reenable(); 410 hpet_reenable();
406#endif 411#endif
412 setup_pit_timer();
407 sec = get_cmos_time() + clock_cmos_diff; 413 sec = get_cmos_time() + clock_cmos_diff;
408 sleep_length = (get_cmos_time() - sleep_start) * HZ; 414 sleep_length = (get_cmos_time() - sleep_start) * HZ;
409 write_seqlock_irqsave(&xtime_lock, flags); 415 write_seqlock_irqsave(&xtime_lock, flags);
@@ -412,6 +418,10 @@ static int timer_resume(struct sys_device *dev)
412 write_sequnlock_irqrestore(&xtime_lock, flags); 418 write_sequnlock_irqrestore(&xtime_lock, flags);
413 jiffies += sleep_length; 419 jiffies += sleep_length;
414 wall_jiffies += sleep_length; 420 wall_jiffies += sleep_length;
421 if (last_timer->resume)
422 last_timer->resume();
423 cur_timer = last_timer;
424 last_timer = NULL;
415 return 0; 425 return 0;
416} 426}
417 427
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
index ef8dac5dd33b..001de97c9e4a 100644
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ b/arch/i386/kernel/timers/timer_hpet.c
@@ -136,6 +136,8 @@ static void delay_hpet(unsigned long loops)
136 } while ((hpet_end - hpet_start) < (loops)); 136 } while ((hpet_end - hpet_start) < (loops));
137} 137}
138 138
139static struct timer_opts timer_hpet;
140
139static int __init init_hpet(char* override) 141static int __init init_hpet(char* override)
140{ 142{
141 unsigned long result, remain; 143 unsigned long result, remain;
@@ -163,6 +165,8 @@ static int __init init_hpet(char* override)
163 } 165 }
164 set_cyc2ns_scale(cpu_khz/1000); 166 set_cyc2ns_scale(cpu_khz/1000);
165 } 167 }
168 /* set this only when cpu_has_tsc */
169 timer_hpet.read_timer = read_timer_tsc;
166 } 170 }
167 171
168 /* 172 /*
@@ -177,6 +181,19 @@ static int __init init_hpet(char* override)
177 return 0; 181 return 0;
178} 182}
179 183
184static int hpet_resume(void)
185{
186 write_seqlock(&monotonic_lock);
187 /* Assume this is the last mark offset time */
188 rdtsc(last_tsc_low, last_tsc_high);
189
190 if (hpet_use_timer)
191 hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
192 else
193 hpet_last = hpet_readl(HPET_COUNTER);
194 write_sequnlock(&monotonic_lock);
195 return 0;
196}
180/************************************************************/ 197/************************************************************/
181 198
182/* tsc timer_opts struct */ 199/* tsc timer_opts struct */
@@ -186,7 +203,7 @@ static struct timer_opts timer_hpet __read_mostly = {
186 .get_offset = get_offset_hpet, 203 .get_offset = get_offset_hpet,
187 .monotonic_clock = monotonic_clock_hpet, 204 .monotonic_clock = monotonic_clock_hpet,
188 .delay = delay_hpet, 205 .delay = delay_hpet,
189 .read_timer = read_timer_tsc, 206 .resume = hpet_resume,
190}; 207};
191 208
192struct init_timer_opts __initdata timer_hpet_init = { 209struct init_timer_opts __initdata timer_hpet_init = {
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
index 06de036a820c..eddb64038234 100644
--- a/arch/i386/kernel/timers/timer_pit.c
+++ b/arch/i386/kernel/timers/timer_pit.c
@@ -175,30 +175,3 @@ void setup_pit_timer(void)
175 outb(LATCH >> 8 , PIT_CH0); /* MSB */ 175 outb(LATCH >> 8 , PIT_CH0); /* MSB */
176 spin_unlock_irqrestore(&i8253_lock, flags); 176 spin_unlock_irqrestore(&i8253_lock, flags);
177} 177}
178
179static int timer_resume(struct sys_device *dev)
180{
181 setup_pit_timer();
182 return 0;
183}
184
185static struct sysdev_class timer_sysclass = {
186 set_kset_name("timer_pit"),
187 .resume = timer_resume,
188};
189
190static struct sys_device device_timer = {
191 .id = 0,
192 .cls = &timer_sysclass,
193};
194
195static int __init init_timer_sysfs(void)
196{
197 int error = sysdev_class_register(&timer_sysclass);
198 if (!error)
199 error = sysdev_register(&device_timer);
200 return error;
201}
202
203device_initcall(init_timer_sysfs);
204
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
index 4ef20e663498..264edaaac315 100644
--- a/arch/i386/kernel/timers/timer_pm.c
+++ b/arch/i386/kernel/timers/timer_pm.c
@@ -186,6 +186,14 @@ static void mark_offset_pmtmr(void)
186 } 186 }
187} 187}
188 188
189static int pmtmr_resume(void)
190{
191 write_seqlock(&monotonic_lock);
192 /* Assume this is the last mark offset time */
193 offset_tick = read_pmtmr();
194 write_sequnlock(&monotonic_lock);
195 return 0;
196}
189 197
190static unsigned long long monotonic_clock_pmtmr(void) 198static unsigned long long monotonic_clock_pmtmr(void)
191{ 199{
@@ -247,6 +255,7 @@ static struct timer_opts timer_pmtmr = {
247 .monotonic_clock = monotonic_clock_pmtmr, 255 .monotonic_clock = monotonic_clock_pmtmr,
248 .delay = delay_pmtmr, 256 .delay = delay_pmtmr,
249 .read_timer = read_timer_tsc, 257 .read_timer = read_timer_tsc,
258 .resume = pmtmr_resume,
250}; 259};
251 260
252struct init_timer_opts __initdata timer_pmtmr_init = { 261struct init_timer_opts __initdata timer_pmtmr_init = {
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index 8f4e4d5bc560..6dd470cc9f72 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -543,6 +543,19 @@ static int __init init_tsc(char* override)
543 return -ENODEV; 543 return -ENODEV;
544} 544}
545 545
546static int tsc_resume(void)
547{
548 write_seqlock(&monotonic_lock);
549 /* Assume this is the last mark offset time */
550 rdtsc(last_tsc_low, last_tsc_high);
551#ifdef CONFIG_HPET_TIMER
552 if (is_hpet_enabled() && hpet_use_timer)
553 hpet_last = hpet_readl(HPET_COUNTER);
554#endif
555 write_sequnlock(&monotonic_lock);
556 return 0;
557}
558
546#ifndef CONFIG_X86_TSC 559#ifndef CONFIG_X86_TSC
547/* disable flag for tsc. Takes effect by clearing the TSC cpu flag 560/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
548 * in cpu/common.c */ 561 * in cpu/common.c */
@@ -573,6 +586,7 @@ static struct timer_opts timer_tsc = {
573 .monotonic_clock = monotonic_clock_tsc, 586 .monotonic_clock = monotonic_clock_tsc,
574 .delay = delay_tsc, 587 .delay = delay_tsc,
575 .read_timer = read_timer_tsc, 588 .read_timer = read_timer_tsc,
589 .resume = tsc_resume,
576}; 590};
577 591
578struct init_timer_opts __initdata timer_tsc_init = { 592struct init_timer_opts __initdata timer_tsc_init = {
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index cd2d5d5514fe..54629bb5893a 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -210,7 +210,7 @@ void show_registers(struct pt_regs *regs)
210 unsigned short ss; 210 unsigned short ss;
211 211
212 esp = (unsigned long) (&regs->esp); 212 esp = (unsigned long) (&regs->esp);
213 ss = __KERNEL_DS; 213 savesegment(ss, ss);
214 if (user_mode(regs)) { 214 if (user_mode(regs)) {
215 in_kernel = 0; 215 in_kernel = 0;
216 esp = regs->esp; 216 esp = regs->esp;
@@ -267,9 +267,6 @@ static void handle_BUG(struct pt_regs *regs)
267 char c; 267 char c;
268 unsigned long eip; 268 unsigned long eip;
269 269
270 if (user_mode(regs))
271 goto no_bug; /* Not in kernel */
272
273 eip = regs->eip; 270 eip = regs->eip;
274 271
275 if (eip < PAGE_OFFSET) 272 if (eip < PAGE_OFFSET)
@@ -568,6 +565,10 @@ static DEFINE_SPINLOCK(nmi_print_lock);
568 565
569void die_nmi (struct pt_regs *regs, const char *msg) 566void die_nmi (struct pt_regs *regs, const char *msg)
570{ 567{
568 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
569 NOTIFY_STOP)
570 return;
571
571 spin_lock(&nmi_print_lock); 572 spin_lock(&nmi_print_lock);
572 /* 573 /*
573 * We are in trouble anyway, lets at least try 574 * We are in trouble anyway, lets at least try
@@ -1008,7 +1009,7 @@ void __init trap_init_f00f_bug(void)
1008 * it uses the read-only mapped virtual address. 1009 * it uses the read-only mapped virtual address.
1009 */ 1010 */
1010 idt_descr.address = fix_to_virt(FIX_F00F_IDT); 1011 idt_descr.address = fix_to_virt(FIX_F00F_IDT);
1011 __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); 1012 load_idt(&idt_descr);
1012} 1013}
1013#endif 1014#endif
1014 1015
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index ec0f68ce6886..16b485009622 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -294,8 +294,8 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
294 */ 294 */
295 info->regs32->eax = 0; 295 info->regs32->eax = 0;
296 tsk->thread.saved_esp0 = tsk->thread.esp0; 296 tsk->thread.saved_esp0 = tsk->thread.esp0;
297 asm volatile("mov %%fs,%0":"=m" (tsk->thread.saved_fs)); 297 savesegment(fs, tsk->thread.saved_fs);
298 asm volatile("mov %%gs,%0":"=m" (tsk->thread.saved_gs)); 298 savesegment(gs, tsk->thread.saved_gs);
299 299
300 tss = &per_cpu(init_tss, get_cpu()); 300 tss = &per_cpu(init_tss, get_cpu());
301 tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; 301 tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
@@ -542,7 +542,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
542 unsigned char opcode; 542 unsigned char opcode;
543 unsigned char __user *csp; 543 unsigned char __user *csp;
544 unsigned char __user *ssp; 544 unsigned char __user *ssp;
545 unsigned short ip, sp; 545 unsigned short ip, sp, orig_flags;
546 int data32, pref_done; 546 int data32, pref_done;
547 547
548#define CHECK_IF_IN_TRAP \ 548#define CHECK_IF_IN_TRAP \
@@ -551,8 +551,12 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
551#define VM86_FAULT_RETURN do { \ 551#define VM86_FAULT_RETURN do { \
552 if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \ 552 if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \
553 return_to_32bit(regs, VM86_PICRETURN); \ 553 return_to_32bit(regs, VM86_PICRETURN); \
554 if (orig_flags & TF_MASK) \
555 handle_vm86_trap(regs, 0, 1); \
554 return; } while (0) 556 return; } while (0)
555 557
558 orig_flags = *(unsigned short *)&regs->eflags;
559
556 csp = (unsigned char __user *) (regs->cs << 4); 560 csp = (unsigned char __user *) (regs->cs << 4);
557 ssp = (unsigned char __user *) (regs->ss << 4); 561 ssp = (unsigned char __user *) (regs->ss << 4);
558 sp = SP(regs); 562 sp = SP(regs);
diff --git a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S
index c8fcf75b9be3..68afa50dd7cf 100644
--- a/arch/i386/kernel/vsyscall-sigreturn.S
+++ b/arch/i386/kernel/vsyscall-sigreturn.S
@@ -15,7 +15,7 @@
15*/ 15*/
16 16
17 .text 17 .text
18 .org __kernel_vsyscall+32 18 .org __kernel_vsyscall+32,0x90
19 .globl __kernel_sigreturn 19 .globl __kernel_sigreturn
20 .type __kernel_sigreturn,@function 20 .type __kernel_sigreturn,@function
21__kernel_sigreturn: 21__kernel_sigreturn:
@@ -35,6 +35,7 @@ __kernel_rt_sigreturn:
35 int $0x80 35 int $0x80
36.LEND_rt_sigreturn: 36.LEND_rt_sigreturn:
37 .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn 37 .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
38 .balign 32
38 .previous 39 .previous
39 40
40 .section .eh_frame,"a",@progbits 41 .section .eh_frame,"a",@progbits