aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/include/asm/perf_event.h5
-rw-r--r--arch/alpha/kernel/perf_event.c128
-rw-r--r--arch/alpha/kernel/time.c30
-rw-r--r--arch/arm/Kconfig15
-rw-r--r--arch/arm/include/asm/hw_irq.h2
-rw-r--r--arch/arm/include/asm/perf_event.h12
-rw-r--r--arch/arm/kernel/irq.c10
-rw-r--r--arch/arm/kernel/kprobes-decode.c7
-rw-r--r--arch/arm/kernel/perf_event.c212
-rw-r--r--arch/arm/mach-at91/include/mach/system.h7
-rw-r--r--arch/arm/mach-bcmring/dma.c4
-rw-r--r--arch/arm/mach-bcmring/irq.c6
-rw-r--r--arch/arm/mach-ep93xx/dma-m2p.c2
-rw-r--r--arch/arm/mach-imx/Kconfig1
-rw-r--r--arch/arm/mach-imx/mach-cpuimx27.c2
-rw-r--r--arch/arm/mach-iop13xx/msi.c8
-rw-r--r--arch/arm/mach-lpc32xx/phy3250.c7
-rw-r--r--arch/arm/mach-s5p6440/cpu.c1
-rw-r--r--arch/arm/mach-s5p6442/cpu.c1
-rw-r--r--arch/arm/mach-s5pc100/cpu.c1
-rw-r--r--arch/arm/mach-s5pv210/clock.c5
-rw-r--r--arch/arm/mach-s5pv210/cpu.c1
-rw-r--r--arch/arm/mach-u300/dummyspichip.c5
-rw-r--r--arch/arm/mach-u300/spi.c10
-rw-r--r--arch/arm/mach-ux500/board-mop500.c8
-rw-r--r--arch/arm/mach-vexpress/ct-ca9x4.c4
-rw-r--r--arch/arm/mach-vexpress/v2m.c2
-rw-r--r--arch/arm/mm/ioremap.c8
-rw-r--r--arch/arm/mm/mmu.c4
-rw-r--r--arch/arm/mm/proc-v7.S10
-rw-r--r--arch/arm/oprofile/Makefile4
-rw-r--r--arch/arm/oprofile/common.c311
-rw-r--r--arch/arm/plat-omap/iommu.c1
-rw-r--r--arch/arm/plat-samsung/adc.c1
-rw-r--r--arch/arm/plat-samsung/clock.c27
-rw-r--r--arch/arm/plat-samsung/include/plat/s3c64xx-spi.h3
-rw-r--r--arch/blackfin/include/asm/bfin5xx_spi.h81
-rw-r--r--arch/frv/Kconfig1
-rw-r--r--arch/frv/lib/Makefile2
-rw-r--r--arch/frv/lib/perf_event.c19
-rw-r--r--arch/ia64/Kconfig12
-rw-r--r--arch/ia64/include/asm/compat.h208
-rw-r--r--arch/ia64/include/asm/hardirq.h11
-rw-r--r--arch/ia64/include/asm/iommu_table.h6
-rw-r--r--arch/ia64/include/asm/system.h4
-rw-r--r--arch/ia64/kernel/Makefile1
-rw-r--r--arch/ia64/kernel/cyclone.c2
-rw-r--r--arch/ia64/kernel/iosapic.c60
-rw-r--r--arch/ia64/kernel/irq_ia64.c5
-rw-r--r--arch/ia64/kernel/mca.c38
-rw-r--r--arch/ia64/kernel/msi_ia64.c8
-rw-r--r--arch/ia64/kernel/palinfo.c2
-rw-r--r--arch/ia64/kernel/perfmon.c12
-rw-r--r--arch/ia64/kernel/salinfo.c2
-rw-r--r--arch/ia64/kernel/setup.c4
-rw-r--r--arch/ia64/kernel/stacktrace.c39
-rw-r--r--arch/ia64/kernel/unwind.c23
-rw-r--r--arch/ia64/sn/kernel/msi_sn.c4
-rw-r--r--arch/ia64/xen/xen_pv_ops.c5
-rw-r--r--arch/m32r/include/asm/elf.h4
-rw-r--r--arch/m32r/kernel/.gitignore1
-rw-r--r--arch/m32r/kernel/irq.c2
-rw-r--r--arch/m32r/kernel/signal.c4
-rw-r--r--arch/m32r/platforms/m32104ut/setup.c2
-rw-r--r--arch/m32r/platforms/m32700ut/setup.c8
-rw-r--r--arch/m32r/platforms/mappi/setup.c2
-rw-r--r--arch/m32r/platforms/mappi2/setup.c2
-rw-r--r--arch/m32r/platforms/mappi3/setup.c2
-rw-r--r--arch/m32r/platforms/oaks32r/setup.c2
-rw-r--r--arch/m32r/platforms/opsput/setup.c6
-rw-r--r--arch/m32r/platforms/usrv/setup.c4
-rw-r--r--arch/mips/Kbuild4
-rw-r--r--arch/mips/Kconfig4
-rw-r--r--arch/mips/boot/compressed/Makefile2
-rw-r--r--arch/mips/dec/Platform2
-rw-r--r--arch/mips/include/asm/fcntl.h1
-rw-r--r--arch/mips/include/asm/siginfo.h1
-rw-r--r--arch/mips/jz4740/Platform2
-rw-r--r--arch/mips/kernel/branch.c1
-rw-r--r--arch/mips/kernel/mips-mt-fpaff.c2
-rw-r--r--arch/mips/kernel/ptrace.c4
-rw-r--r--arch/mips/kernel/scall32-o32.S11
-rw-r--r--arch/mips/kernel/scall64-64.S7
-rw-r--r--arch/mips/kernel/scall64-n32.S12
-rw-r--r--arch/mips/kernel/scall64-o32.S15
-rw-r--r--arch/mips/kernel/signal.c45
-rw-r--r--arch/mips/kernel/signal_n32.c5
-rw-r--r--arch/mips/kernel/unaligned.c2
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/include/asm/perf_event.h3
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/boot/dts/mpc8536ds.dts52
-rw-r--r--arch/powerpc/boot/dts/p4080ds.dts11
-rw-r--r--arch/powerpc/include/asm/paca.h2
-rw-r--r--arch/powerpc/include/asm/system.h4
-rw-r--r--arch/powerpc/kernel/perf_callchain.c86
-rw-r--r--arch/powerpc/kernel/perf_event.c166
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c148
-rw-r--r--arch/powerpc/kernel/time.c42
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c6
-rw-r--r--arch/powerpc/platforms/pseries/xics.c2
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c4
-rw-r--r--arch/powerpc/sysdev/mpic_pasemi_msi.c22
-rw-r--r--arch/powerpc/sysdev/mpic_u3msi.c18
-rw-r--r--arch/s390/Kconfig8
-rw-r--r--arch/s390/include/asm/hardirq.h4
-rw-r--r--arch/s390/include/asm/perf_event.h3
-rw-r--r--arch/s390/include/asm/system.h1
-rw-r--r--arch/s390/include/asm/topology.h27
-rw-r--r--arch/s390/kernel/topology.c150
-rw-r--r--arch/sh/Kconfig14
-rw-r--r--arch/sh/include/asm/perf_event.h7
-rw-r--r--arch/sh/kernel/irq.c2
-rw-r--r--arch/sh/kernel/perf_callchain.c50
-rw-r--r--arch/sh/kernel/perf_event.c159
-rw-r--r--arch/sh/oprofile/Makefile4
-rw-r--r--arch/sh/oprofile/common.c115
-rw-r--r--arch/sh/oprofile/op_impl.h33
-rw-r--r--arch/sparc/Kconfig3
-rw-r--r--arch/sparc/include/asm/jump_label.h32
-rw-r--r--arch/sparc/include/asm/perf_event.h4
-rw-r--r--arch/sparc/kernel/Makefile2
-rw-r--r--arch/sparc/kernel/jump_label.c47
-rw-r--r--arch/sparc/kernel/module.c6
-rw-r--r--arch/sparc/kernel/pci_msi.c8
-rw-r--r--arch/sparc/kernel/pcr.c8
-rw-r--r--arch/sparc/kernel/perf_event.c240
-rw-r--r--arch/tile/kernel/irq.c4
-rw-r--r--arch/um/drivers/hostaudio_kern.c14
-rw-r--r--arch/um/drivers/ubd_kern.c9
-rw-r--r--arch/um/kernel/irq.c6
-rw-r--r--arch/x86/Kconfig130
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/ia32/ia32_aout.c22
-rw-r--r--arch/x86/include/asm/alternative.h11
-rw-r--r--arch/x86/include/asm/amd_iommu.h6
-rw-r--r--arch/x86/include/asm/amd_iommu_proto.h2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h23
-rw-r--r--arch/x86/include/asm/amd_nb.h (renamed from arch/x86/include/asm/k8.h)21
-rw-r--r--arch/x86/include/asm/apb_timer.h1
-rw-r--r--arch/x86/include/asm/apic.h4
-rw-r--r--arch/x86/include/asm/apicdef.h1
-rw-r--r--arch/x86/include/asm/calgary.h4
-rw-r--r--arch/x86/include/asm/cpu.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h13
-rw-r--r--arch/x86/include/asm/dwarf2.h20
-rw-r--r--arch/x86/include/asm/entry_arch.h4
-rw-r--r--arch/x86/include/asm/fixmap.h15
-rw-r--r--arch/x86/include/asm/gart.h20
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hpet.h10
-rw-r--r--arch/x86/include/asm/hw_irq.h19
-rw-r--r--arch/x86/include/asm/i387.h185
-rw-r--r--arch/x86/include/asm/i8259.h2
-rw-r--r--arch/x86/include/asm/io.h1
-rw-r--r--arch/x86/include/asm/io_apic.h6
-rw-r--r--arch/x86/include/asm/iommu_table.h100
-rw-r--r--arch/x86/include/asm/irq_remapping.h35
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/jump_label.h37
-rw-r--r--arch/x86/include/asm/kvm_host.h24
-rw-r--r--arch/x86/include/asm/mrst.h10
-rw-r--r--arch/x86/include/asm/mwait.h15
-rw-r--r--arch/x86/include/asm/olpc_ofw.h4
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/paravirt_types.h1
-rw-r--r--arch/x86/include/asm/perf_event_p4.h52
-rw-r--r--arch/x86/include/asm/pgtable.h4
-rw-r--r--arch/x86/include/asm/pgtable_64.h2
-rw-r--r--arch/x86/include/asm/processor.h29
-rw-r--r--arch/x86/include/asm/setup.h5
-rw-r--r--arch/x86/include/asm/swiotlb.h13
-rw-r--r--arch/x86/include/asm/vmi.h269
-rw-r--r--arch/x86/include/asm/vmi_time.h98
-rw-r--r--arch/x86/kernel/Makefile10
-rw-r--r--arch/x86/kernel/acpi/cstate.c11
-rw-r--r--arch/x86/kernel/alternative.c71
-rw-r--r--arch/x86/kernel/amd_iommu.c2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c139
-rw-r--r--arch/x86/kernel/amd_nb.c (renamed from arch/x86/kernel/k8.c)56
-rw-r--r--arch/x86/kernel/apb_timer.c60
-rw-r--r--arch/x86/kernel/aperture_64.c31
-rw-r--r--arch/x86/kernel/apic/apic.c91
-rw-r--r--arch/x86/kernel/apic/io_apic.c882
-rw-r--r--arch/x86/kernel/apic/nmi.c2
-rw-r--r--arch/x86/kernel/apic/probe_64.c3
-rw-r--r--arch/x86/kernel/cpu/amd.c77
-rw-r--r--arch/x86/kernel/cpu/common.c24
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c14
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c36
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c5
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c128
-rw-r--r--arch/x86/kernel/cpu/perf_event.c280
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c292
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c9
-rw-r--r--arch/x86/kernel/cpu/scattered.c6
-rw-r--r--arch/x86/kernel/crash_dump_64.c3
-rw-r--r--arch/x86/kernel/early-quirks.c2
-rw-r--r--arch/x86/kernel/early_printk.c13
-rw-r--r--arch/x86/kernel/early_printk_mrst.c319
-rw-r--r--arch/x86/kernel/entry_32.S310
-rw-r--r--arch/x86/kernel/entry_64.S114
-rw-r--r--arch/x86/kernel/ftrace.c63
-rw-r--r--arch/x86/kernel/hpet.c16
-rw-r--r--arch/x86/kernel/i387.c58
-rw-r--r--arch/x86/kernel/i8259.c63
-rw-r--r--arch/x86/kernel/irq.c32
-rw-r--r--arch/x86/kernel/irq_work.c30
-rw-r--r--arch/x86/kernel/irqinit.c23
-rw-r--r--arch/x86/kernel/jump_label.c50
-rw-r--r--arch/x86/kernel/kprobes.c14
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/module.c3
-rw-r--r--arch/x86/kernel/olpc-xo1.c140
-rw-r--r--arch/x86/kernel/olpc.c89
-rw-r--r--arch/x86/kernel/olpc_ofw.c6
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/kernel/pci-calgary_64.c18
-rw-r--r--arch/x86/kernel/pci-dma.c44
-rw-r--r--arch/x86/kernel/pci-gart_64.c33
-rw-r--r--arch/x86/kernel/pci-iommu_table.c89
-rw-r--r--arch/x86/kernel/pci-swiotlb.c44
-rw-r--r--arch/x86/kernel/pmtimer_64.c69
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/setup.c127
-rw-r--r--arch/x86/kernel/setup_percpu.c2
-rw-r--r--arch/x86/kernel/sfi.c4
-rw-r--r--arch/x86/kernel/smpboot.c118
-rw-r--r--arch/x86/kernel/sys_i386_32.c4
-rw-r--r--arch/x86/kernel/traps.c35
-rw-r--r--arch/x86/kernel/tsc.c66
-rw-r--r--arch/x86/kernel/uv_irq.c55
-rw-r--r--arch/x86/kernel/visws_quirks.c140
-rw-r--r--arch/x86/kernel/vmi_32.c893
-rw-r--r--arch/x86/kernel/vmiclock_32.c317
-rw-r--r--arch/x86/kernel/vmlinux.lds.S28
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/kvm/svm.c17
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/lguest/boot.c18
-rw-r--r--arch/x86/lib/memcpy_32.c199
-rw-r--r--arch/x86/lib/memcpy_64.S158
-rw-r--r--arch/x86/lib/memmove_64.c189
-rw-r--r--arch/x86/mm/fault.c47
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c49
-rw-r--r--arch/x86/mm/k8topology_64.c8
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c2
-rw-r--r--arch/x86/mm/kmemcheck/opcode.c2
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/pgtable.c24
-rw-r--r--arch/x86/mm/srat_64.c8
-rw-r--r--arch/x86/mm/tlb.c48
-rw-r--r--arch/x86/oprofile/backtrace.c70
-rw-r--r--arch/x86/oprofile/nmi_int.c9
-rw-r--r--arch/x86/oprofile/op_model_amd.c145
-rw-r--r--arch/x86/pci/olpc.c2
-rw-r--r--arch/x86/xen/mmu.c1
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c5
-rw-r--r--arch/xtensa/kernel/irq.c2
272 files changed, 5349 insertions, 5745 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index fe48fc7a3eba..53d7f619a1b9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI
158 subsystem. Also has support for calculating CPU cycle events 158 subsystem. Also has support for calculating CPU cycle events
159 to determine how many clock cycles in a given period. 159 to determine how many clock cycles in a given period.
160 160
161config HAVE_ARCH_JUMP_LABEL
162 bool
163
161source "kernel/gcov/Kconfig" 164source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index b9647bb66d13..d04ccd73af45 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -9,6 +9,7 @@ config ALPHA
9 select HAVE_IDE 9 select HAVE_IDE
10 select HAVE_OPROFILE 10 select HAVE_OPROFILE
11 select HAVE_SYSCALL_WRAPPERS 11 select HAVE_SYSCALL_WRAPPERS
12 select HAVE_IRQ_WORK
12 select HAVE_PERF_EVENTS 13 select HAVE_PERF_EVENTS
13 select HAVE_DMA_ATTRS 14 select HAVE_DMA_ATTRS
14 help 15 help
diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h
index 4157cd3c44a9..fe792ca818f6 100644
--- a/arch/alpha/include/asm/perf_event.h
+++ b/arch/alpha/include/asm/perf_event.h
@@ -1,11 +1,6 @@
1#ifndef __ASM_ALPHA_PERF_EVENT_H 1#ifndef __ASM_ALPHA_PERF_EVENT_H
2#define __ASM_ALPHA_PERF_EVENT_H 2#define __ASM_ALPHA_PERF_EVENT_H
3 3
4/* Alpha only supports software events through this interface. */
5extern void set_perf_event_pending(void);
6
7#define PERF_EVENT_INDEX_OFFSET 0
8
9#ifdef CONFIG_PERF_EVENTS 4#ifdef CONFIG_PERF_EVENTS
10extern void init_hw_perf_events(void); 5extern void init_hw_perf_events(void);
11#else 6#else
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 85d8e4f58c83..1cc49683fb69 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -307,7 +307,7 @@ again:
307 new_raw_count) != prev_raw_count) 307 new_raw_count) != prev_raw_count)
308 goto again; 308 goto again;
309 309
310 delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; 310 delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
311 311
312 /* It is possible on very rare occasions that the PMC has overflowed 312 /* It is possible on very rare occasions that the PMC has overflowed
313 * but the interrupt is yet to come. Detect and fix this situation. 313 * but the interrupt is yet to come. Detect and fix this situation.
@@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
402 struct hw_perf_event *hwc = &pe->hw; 402 struct hw_perf_event *hwc = &pe->hw;
403 int idx = hwc->idx; 403 int idx = hwc->idx;
404 404
405 if (cpuc->current_idx[j] != PMC_NO_INDEX) { 405 if (cpuc->current_idx[j] == PMC_NO_INDEX) {
406 cpuc->idx_mask |= (1<<cpuc->current_idx[j]); 406 alpha_perf_event_set_period(pe, hwc, idx);
407 continue; 407 cpuc->current_idx[j] = idx;
408 } 408 }
409 409
410 alpha_perf_event_set_period(pe, hwc, idx); 410 if (!(hwc->state & PERF_HES_STOPPED))
411 cpuc->current_idx[j] = idx; 411 cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
412 cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
413 } 412 }
414 cpuc->config = cpuc->event[0]->hw.config_base; 413 cpuc->config = cpuc->event[0]->hw.config_base;
415} 414}
@@ -420,12 +419,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
420 * - this function is called from outside this module via the pmu struct 419 * - this function is called from outside this module via the pmu struct
421 * returned from perf event initialisation. 420 * returned from perf event initialisation.
422 */ 421 */
423static int alpha_pmu_enable(struct perf_event *event) 422static int alpha_pmu_add(struct perf_event *event, int flags)
424{ 423{
425 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 424 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
425 struct hw_perf_event *hwc = &event->hw;
426 int n0; 426 int n0;
427 int ret; 427 int ret;
428 unsigned long flags; 428 unsigned long irq_flags;
429 429
430 /* 430 /*
431 * The Sparc code has the IRQ disable first followed by the perf 431 * The Sparc code has the IRQ disable first followed by the perf
@@ -435,8 +435,8 @@ static int alpha_pmu_enable(struct perf_event *event)
435 * nevertheless we disable the PMCs first to enable a potential 435 * nevertheless we disable the PMCs first to enable a potential
436 * final PMI to occur before we disable interrupts. 436 * final PMI to occur before we disable interrupts.
437 */ 437 */
438 perf_disable(); 438 perf_pmu_disable(event->pmu);
439 local_irq_save(flags); 439 local_irq_save(irq_flags);
440 440
441 /* Default to error to be returned */ 441 /* Default to error to be returned */
442 ret = -EAGAIN; 442 ret = -EAGAIN;
@@ -455,8 +455,12 @@ static int alpha_pmu_enable(struct perf_event *event)
455 } 455 }
456 } 456 }
457 457
458 local_irq_restore(flags); 458 hwc->state = PERF_HES_UPTODATE;
459 perf_enable(); 459 if (!(flags & PERF_EF_START))
460 hwc->state |= PERF_HES_STOPPED;
461
462 local_irq_restore(irq_flags);
463 perf_pmu_enable(event->pmu);
460 464
461 return ret; 465 return ret;
462} 466}
@@ -467,15 +471,15 @@ static int alpha_pmu_enable(struct perf_event *event)
467 * - this function is called from outside this module via the pmu struct 471 * - this function is called from outside this module via the pmu struct
468 * returned from perf event initialisation. 472 * returned from perf event initialisation.
469 */ 473 */
470static void alpha_pmu_disable(struct perf_event *event) 474static void alpha_pmu_del(struct perf_event *event, int flags)
471{ 475{
472 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 476 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
473 struct hw_perf_event *hwc = &event->hw; 477 struct hw_perf_event *hwc = &event->hw;
474 unsigned long flags; 478 unsigned long irq_flags;
475 int j; 479 int j;
476 480
477 perf_disable(); 481 perf_pmu_disable(event->pmu);
478 local_irq_save(flags); 482 local_irq_save(irq_flags);
479 483
480 for (j = 0; j < cpuc->n_events; j++) { 484 for (j = 0; j < cpuc->n_events; j++) {
481 if (event == cpuc->event[j]) { 485 if (event == cpuc->event[j]) {
@@ -501,8 +505,8 @@ static void alpha_pmu_disable(struct perf_event *event)
501 } 505 }
502 } 506 }
503 507
504 local_irq_restore(flags); 508 local_irq_restore(irq_flags);
505 perf_enable(); 509 perf_pmu_enable(event->pmu);
506} 510}
507 511
508 512
@@ -514,13 +518,44 @@ static void alpha_pmu_read(struct perf_event *event)
514} 518}
515 519
516 520
517static void alpha_pmu_unthrottle(struct perf_event *event) 521static void alpha_pmu_stop(struct perf_event *event, int flags)
522{
523 struct hw_perf_event *hwc = &event->hw;
524 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
525
526 if (!(hwc->state & PERF_HES_STOPPED)) {
527 cpuc->idx_mask &= ~(1UL<<hwc->idx);
528 hwc->state |= PERF_HES_STOPPED;
529 }
530
531 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
532 alpha_perf_event_update(event, hwc, hwc->idx, 0);
533 hwc->state |= PERF_HES_UPTODATE;
534 }
535
536 if (cpuc->enabled)
537 wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx));
538}
539
540
541static void alpha_pmu_start(struct perf_event *event, int flags)
518{ 542{
519 struct hw_perf_event *hwc = &event->hw; 543 struct hw_perf_event *hwc = &event->hw;
520 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 544 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
521 545
546 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
547 return;
548
549 if (flags & PERF_EF_RELOAD) {
550 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
551 alpha_perf_event_set_period(event, hwc, hwc->idx);
552 }
553
554 hwc->state = 0;
555
522 cpuc->idx_mask |= 1UL<<hwc->idx; 556 cpuc->idx_mask |= 1UL<<hwc->idx;
523 wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); 557 if (cpuc->enabled)
558 wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
524} 559}
525 560
526 561
@@ -642,39 +677,36 @@ static int __hw_perf_event_init(struct perf_event *event)
642 return 0; 677 return 0;
643} 678}
644 679
645static const struct pmu pmu = {
646 .enable = alpha_pmu_enable,
647 .disable = alpha_pmu_disable,
648 .read = alpha_pmu_read,
649 .unthrottle = alpha_pmu_unthrottle,
650};
651
652
653/* 680/*
654 * Main entry point to initialise a HW performance event. 681 * Main entry point to initialise a HW performance event.
655 */ 682 */
656const struct pmu *hw_perf_event_init(struct perf_event *event) 683static int alpha_pmu_event_init(struct perf_event *event)
657{ 684{
658 int err; 685 int err;
659 686
687 switch (event->attr.type) {
688 case PERF_TYPE_RAW:
689 case PERF_TYPE_HARDWARE:
690 case PERF_TYPE_HW_CACHE:
691 break;
692
693 default:
694 return -ENOENT;
695 }
696
660 if (!alpha_pmu) 697 if (!alpha_pmu)
661 return ERR_PTR(-ENODEV); 698 return -ENODEV;
662 699
663 /* Do the real initialisation work. */ 700 /* Do the real initialisation work. */
664 err = __hw_perf_event_init(event); 701 err = __hw_perf_event_init(event);
665 702
666 if (err) 703 return err;
667 return ERR_PTR(err);
668
669 return &pmu;
670} 704}
671 705
672
673
674/* 706/*
675 * Main entry point - enable HW performance counters. 707 * Main entry point - enable HW performance counters.
676 */ 708 */
677void hw_perf_enable(void) 709static void alpha_pmu_enable(struct pmu *pmu)
678{ 710{
679 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 711 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
680 712
@@ -700,7 +732,7 @@ void hw_perf_enable(void)
700 * Main entry point - disable HW performance counters. 732 * Main entry point - disable HW performance counters.
701 */ 733 */
702 734
703void hw_perf_disable(void) 735static void alpha_pmu_disable(struct pmu *pmu)
704{ 736{
705 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 737 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
706 738
@@ -713,6 +745,17 @@ void hw_perf_disable(void)
713 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); 745 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
714} 746}
715 747
748static struct pmu pmu = {
749 .pmu_enable = alpha_pmu_enable,
750 .pmu_disable = alpha_pmu_disable,
751 .event_init = alpha_pmu_event_init,
752 .add = alpha_pmu_add,
753 .del = alpha_pmu_del,
754 .start = alpha_pmu_start,
755 .stop = alpha_pmu_stop,
756 .read = alpha_pmu_read,
757};
758
716 759
717/* 760/*
718 * Main entry point - don't know when this is called but it 761 * Main entry point - don't know when this is called but it
@@ -766,7 +809,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
766 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); 809 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
767 810
768 /* la_ptr is the counter that overflowed. */ 811 /* la_ptr is the counter that overflowed. */
769 if (unlikely(la_ptr >= perf_max_events)) { 812 if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
770 /* This should never occur! */ 813 /* This should never occur! */
771 irq_err_count++; 814 irq_err_count++;
772 pr_warning("PMI: silly index %ld\n", la_ptr); 815 pr_warning("PMI: silly index %ld\n", la_ptr);
@@ -807,7 +850,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
807 /* Interrupts coming too quickly; "throttle" the 850 /* Interrupts coming too quickly; "throttle" the
808 * counter, i.e., disable it for a little while. 851 * counter, i.e., disable it for a little while.
809 */ 852 */
810 cpuc->idx_mask &= ~(1UL<<idx); 853 alpha_pmu_stop(event, 0);
811 } 854 }
812 } 855 }
813 wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); 856 wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
@@ -837,6 +880,7 @@ void __init init_hw_perf_events(void)
837 880
838 /* And set up PMU specification */ 881 /* And set up PMU specification */
839 alpha_pmu = &ev67_pmu; 882 alpha_pmu = &ev67_pmu;
840 perf_max_events = alpha_pmu->num_pmcs; 883
884 perf_pmu_register(&pmu);
841} 885}
842 886
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 396af1799ea4..0f1d8493cfca 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -41,7 +41,7 @@
41#include <linux/init.h> 41#include <linux/init.h>
42#include <linux/bcd.h> 42#include <linux/bcd.h>
43#include <linux/profile.h> 43#include <linux/profile.h>
44#include <linux/perf_event.h> 44#include <linux/irq_work.h>
45 45
46#include <asm/uaccess.h> 46#include <asm/uaccess.h>
47#include <asm/io.h> 47#include <asm/io.h>
@@ -83,25 +83,25 @@ static struct {
83 83
84unsigned long est_cycle_freq; 84unsigned long est_cycle_freq;
85 85
86#ifdef CONFIG_PERF_EVENTS 86#ifdef CONFIG_IRQ_WORK
87 87
88DEFINE_PER_CPU(u8, perf_event_pending); 88DEFINE_PER_CPU(u8, irq_work_pending);
89 89
90#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 90#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
91#define test_perf_event_pending() __get_cpu_var(perf_event_pending) 91#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
92#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 92#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
93 93
94void set_perf_event_pending(void) 94void set_irq_work_pending(void)
95{ 95{
96 set_perf_event_pending_flag(); 96 set_irq_work_pending_flag();
97} 97}
98 98
99#else /* CONFIG_PERF_EVENTS */ 99#else /* CONFIG_IRQ_WORK */
100 100
101#define test_perf_event_pending() 0 101#define test_irq_work_pending() 0
102#define clear_perf_event_pending() 102#define clear_irq_work_pending()
103 103
104#endif /* CONFIG_PERF_EVENTS */ 104#endif /* CONFIG_IRQ_WORK */
105 105
106 106
107static inline __u32 rpcc(void) 107static inline __u32 rpcc(void)
@@ -191,9 +191,9 @@ irqreturn_t timer_interrupt(int irq, void *dev)
191 191
192 write_sequnlock(&xtime_lock); 192 write_sequnlock(&xtime_lock);
193 193
194 if (test_perf_event_pending()) { 194 if (test_irq_work_pending()) {
195 clear_perf_event_pending(); 195 clear_irq_work_pending();
196 perf_event_do_pending(); 196 irq_work_run();
197 } 197 }
198 198
199#ifndef CONFIG_SMP 199#ifndef CONFIG_SMP
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 88c97bc7a6f5..9103904b3dab 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -23,6 +23,7 @@ config ARM
23 select HAVE_KERNEL_GZIP 23 select HAVE_KERNEL_GZIP
24 select HAVE_KERNEL_LZO 24 select HAVE_KERNEL_LZO
25 select HAVE_KERNEL_LZMA 25 select HAVE_KERNEL_LZMA
26 select HAVE_IRQ_WORK
26 select HAVE_PERF_EVENTS 27 select HAVE_PERF_EVENTS
27 select PERF_USE_VMALLOC 28 select PERF_USE_VMALLOC
28 select HAVE_REGS_AND_STACK_ACCESS_API 29 select HAVE_REGS_AND_STACK_ACCESS_API
@@ -1101,6 +1102,20 @@ config ARM_ERRATA_720789
1101 invalidated are not, resulting in an incoherency in the system page 1102 invalidated are not, resulting in an incoherency in the system page
1102 tables. The workaround changes the TLB flushing routines to invalidate 1103 tables. The workaround changes the TLB flushing routines to invalidate
1103 entries regardless of the ASID. 1104 entries regardless of the ASID.
1105
1106config ARM_ERRATA_743622
1107 bool "ARM errata: Faulty hazard checking in the Store Buffer may lead to data corruption"
1108 depends on CPU_V7
1109 help
1110 This option enables the workaround for the 743622 Cortex-A9
1111 (r2p0..r2p2) erratum. Under very rare conditions, a faulty
1112 optimisation in the Cortex-A9 Store Buffer may lead to data
1113 corruption. This workaround sets a specific bit in the diagnostic
1114 register of the Cortex-A9 which disables the Store Buffer
1115 optimisation, preventing the defect from occurring. This has no
1116 visible impact on the overall performance or power consumption of the
1117 processor.
1118
1104endmenu 1119endmenu
1105 1120
1106source "arch/arm/common/Kconfig" 1121source "arch/arm/common/Kconfig"
diff --git a/arch/arm/include/asm/hw_irq.h b/arch/arm/include/asm/hw_irq.h
index 90831f6f5f5c..5586b7c8ef6f 100644
--- a/arch/arm/include/asm/hw_irq.h
+++ b/arch/arm/include/asm/hw_irq.h
@@ -24,4 +24,6 @@ void set_irq_flags(unsigned int irq, unsigned int flags);
24#define IRQF_PROBE (1 << 1) 24#define IRQF_PROBE (1 << 1)
25#define IRQF_NOAUTOEN (1 << 2) 25#define IRQF_NOAUTOEN (1 << 2)
26 26
27#define ARCH_IRQ_INIT_FLAGS (IRQ_NOREQUEST | IRQ_NOPROBE)
28
27#endif 29#endif
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index b5799a3b7117..c4aa4e8c6af9 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,18 +12,6 @@
12#ifndef __ARM_PERF_EVENT_H__ 12#ifndef __ARM_PERF_EVENT_H__
13#define __ARM_PERF_EVENT_H__ 13#define __ARM_PERF_EVENT_H__
14 14
15/*
16 * NOP: on *most* (read: all supported) ARM platforms, the performance
17 * counter interrupts are regular interrupts and not an NMI. This
18 * means that when we receive the interrupt we can call
19 * perf_event_do_pending() that handles all of the work with
20 * interrupts disabled.
21 */
22static inline void
23set_perf_event_pending(void)
24{
25}
26
27/* ARM performance counters start from 1 (in the cp15 accesses) so use the 15/* ARM performance counters start from 1 (in the cp15 accesses) so use the
28 * same indexes here for consistency. */ 16 * same indexes here for consistency. */
29#define PERF_EVENT_INDEX_OFFSET 1 17#define PERF_EVENT_INDEX_OFFSET 1
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index c0d5c3b3a760..36ad3be4692a 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -154,14 +154,6 @@ void set_irq_flags(unsigned int irq, unsigned int iflags)
154 154
155void __init init_IRQ(void) 155void __init init_IRQ(void)
156{ 156{
157 struct irq_desc *desc;
158 int irq;
159
160 for (irq = 0; irq < nr_irqs; irq++) {
161 desc = irq_to_desc_alloc_node(irq, 0);
162 desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
163 }
164
165 init_arch_irq(); 157 init_arch_irq();
166} 158}
167 159
@@ -169,7 +161,7 @@ void __init init_IRQ(void)
169int __init arch_probe_nr_irqs(void) 161int __init arch_probe_nr_irqs(void)
170{ 162{
171 nr_irqs = arch_nr_irqs ? arch_nr_irqs : NR_IRQS; 163 nr_irqs = arch_nr_irqs ? arch_nr_irqs : NR_IRQS;
172 return 0; 164 return nr_irqs;
173} 165}
174#endif 166#endif
175 167
diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 8bccbfa693ff..2c1f0050c9c4 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1162,11 +1162,12 @@ space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
1162{ 1162{
1163 /* 1163 /*
1164 * MSR : cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx 1164 * MSR : cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx
1165 * Undef : cccc 0011 0x00 xxxx xxxx xxxx xxxx xxxx 1165 * Undef : cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx
1166 * ALU op with S bit and Rd == 15 : 1166 * ALU op with S bit and Rd == 15 :
1167 * cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx 1167 * cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx
1168 */ 1168 */
1169 if ((insn & 0x0f900000) == 0x03200000 || /* MSR & Undef */ 1169 if ((insn & 0x0fb00000) == 0x03200000 || /* MSR */
1170 (insn & 0x0ff00000) == 0x03400000 || /* Undef */
1170 (insn & 0x0e10f000) == 0x0210f000) /* ALU s-bit, R15 */ 1171 (insn & 0x0e10f000) == 0x0210f000) /* ALU s-bit, R15 */
1171 return INSN_REJECTED; 1172 return INSN_REJECTED;
1172 1173
@@ -1177,7 +1178,7 @@ space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
1177 * *S (bit 20) updates condition codes 1178 * *S (bit 20) updates condition codes
1178 * ADC/SBC/RSC reads the C flag 1179 * ADC/SBC/RSC reads the C flag
1179 */ 1180 */
1180 insn &= 0xfff00fff; /* Rn = r0, Rd = r0 */ 1181 insn &= 0xffff0fff; /* Rd = r0 */
1181 asi->insn[0] = insn; 1182 asi->insn[0] = insn;
1182 asi->insn_handler = (insn & (1 << 20)) ? /* S-bit */ 1183 asi->insn_handler = (insn & (1 << 20)) ? /* S-bit */
1183 emulate_alu_imm_rwflags : emulate_alu_imm_rflags; 1184 emulate_alu_imm_rwflags : emulate_alu_imm_rflags;
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index ecbb0288e5dd..49643b1467e6 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -123,6 +123,12 @@ armpmu_get_max_events(void)
123} 123}
124EXPORT_SYMBOL_GPL(armpmu_get_max_events); 124EXPORT_SYMBOL_GPL(armpmu_get_max_events);
125 125
126int perf_num_counters(void)
127{
128 return armpmu_get_max_events();
129}
130EXPORT_SYMBOL_GPL(perf_num_counters);
131
126#define HW_OP_UNSUPPORTED 0xFFFF 132#define HW_OP_UNSUPPORTED 0xFFFF
127 133
128#define C(_x) \ 134#define C(_x) \
@@ -221,46 +227,56 @@ again:
221} 227}
222 228
223static void 229static void
224armpmu_disable(struct perf_event *event) 230armpmu_read(struct perf_event *event)
225{ 231{
226 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
227 struct hw_perf_event *hwc = &event->hw; 232 struct hw_perf_event *hwc = &event->hw;
228 int idx = hwc->idx;
229
230 WARN_ON(idx < 0);
231
232 clear_bit(idx, cpuc->active_mask);
233 armpmu->disable(hwc, idx);
234
235 barrier();
236 233
237 armpmu_event_update(event, hwc, idx); 234 /* Don't read disabled counters! */
238 cpuc->events[idx] = NULL; 235 if (hwc->idx < 0)
239 clear_bit(idx, cpuc->used_mask); 236 return;
240 237
241 perf_event_update_userpage(event); 238 armpmu_event_update(event, hwc, hwc->idx);
242} 239}
243 240
244static void 241static void
245armpmu_read(struct perf_event *event) 242armpmu_stop(struct perf_event *event, int flags)
246{ 243{
247 struct hw_perf_event *hwc = &event->hw; 244 struct hw_perf_event *hwc = &event->hw;
248 245
249 /* Don't read disabled counters! */ 246 if (!armpmu)
250 if (hwc->idx < 0)
251 return; 247 return;
252 248
253 armpmu_event_update(event, hwc, hwc->idx); 249 /*
250 * ARM pmu always has to update the counter, so ignore
251 * PERF_EF_UPDATE, see comments in armpmu_start().
252 */
253 if (!(hwc->state & PERF_HES_STOPPED)) {
254 armpmu->disable(hwc, hwc->idx);
255 barrier(); /* why? */
256 armpmu_event_update(event, hwc, hwc->idx);
257 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
258 }
254} 259}
255 260
256static void 261static void
257armpmu_unthrottle(struct perf_event *event) 262armpmu_start(struct perf_event *event, int flags)
258{ 263{
259 struct hw_perf_event *hwc = &event->hw; 264 struct hw_perf_event *hwc = &event->hw;
260 265
266 if (!armpmu)
267 return;
268
269 /*
270 * ARM pmu always has to reprogram the period, so ignore
271 * PERF_EF_RELOAD, see the comment below.
272 */
273 if (flags & PERF_EF_RELOAD)
274 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
275
276 hwc->state = 0;
261 /* 277 /*
262 * Set the period again. Some counters can't be stopped, so when we 278 * Set the period again. Some counters can't be stopped, so when we
263 * were throttled we simply disabled the IRQ source and the counter 279 * were stopped we simply disabled the IRQ source and the counter
264 * may have been left counting. If we don't do this step then we may 280 * may have been left counting. If we don't do this step then we may
265 * get an interrupt too soon or *way* too late if the overflow has 281 * get an interrupt too soon or *way* too late if the overflow has
266 * happened since disabling. 282 * happened since disabling.
@@ -269,14 +285,33 @@ armpmu_unthrottle(struct perf_event *event)
269 armpmu->enable(hwc, hwc->idx); 285 armpmu->enable(hwc, hwc->idx);
270} 286}
271 287
288static void
289armpmu_del(struct perf_event *event, int flags)
290{
291 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
292 struct hw_perf_event *hwc = &event->hw;
293 int idx = hwc->idx;
294
295 WARN_ON(idx < 0);
296
297 clear_bit(idx, cpuc->active_mask);
298 armpmu_stop(event, PERF_EF_UPDATE);
299 cpuc->events[idx] = NULL;
300 clear_bit(idx, cpuc->used_mask);
301
302 perf_event_update_userpage(event);
303}
304
272static int 305static int
273armpmu_enable(struct perf_event *event) 306armpmu_add(struct perf_event *event, int flags)
274{ 307{
275 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 308 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
276 struct hw_perf_event *hwc = &event->hw; 309 struct hw_perf_event *hwc = &event->hw;
277 int idx; 310 int idx;
278 int err = 0; 311 int err = 0;
279 312
313 perf_pmu_disable(event->pmu);
314
280 /* If we don't have a space for the counter then finish early. */ 315 /* If we don't have a space for the counter then finish early. */
281 idx = armpmu->get_event_idx(cpuc, hwc); 316 idx = armpmu->get_event_idx(cpuc, hwc);
282 if (idx < 0) { 317 if (idx < 0) {
@@ -293,25 +328,19 @@ armpmu_enable(struct perf_event *event)
293 cpuc->events[idx] = event; 328 cpuc->events[idx] = event;
294 set_bit(idx, cpuc->active_mask); 329 set_bit(idx, cpuc->active_mask);
295 330
296 /* Set the period for the event. */ 331 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
297 armpmu_event_set_period(event, hwc, idx); 332 if (flags & PERF_EF_START)
298 333 armpmu_start(event, PERF_EF_RELOAD);
299 /* Enable the event. */
300 armpmu->enable(hwc, idx);
301 334
302 /* Propagate our changes to the userspace mapping. */ 335 /* Propagate our changes to the userspace mapping. */
303 perf_event_update_userpage(event); 336 perf_event_update_userpage(event);
304 337
305out: 338out:
339 perf_pmu_enable(event->pmu);
306 return err; 340 return err;
307} 341}
308 342
309static struct pmu pmu = { 343static struct pmu pmu;
310 .enable = armpmu_enable,
311 .disable = armpmu_disable,
312 .unthrottle = armpmu_unthrottle,
313 .read = armpmu_read,
314};
315 344
316static int 345static int
317validate_event(struct cpu_hw_events *cpuc, 346validate_event(struct cpu_hw_events *cpuc,
@@ -491,20 +520,29 @@ __hw_perf_event_init(struct perf_event *event)
491 return err; 520 return err;
492} 521}
493 522
494const struct pmu * 523static int armpmu_event_init(struct perf_event *event)
495hw_perf_event_init(struct perf_event *event)
496{ 524{
497 int err = 0; 525 int err = 0;
498 526
527 switch (event->attr.type) {
528 case PERF_TYPE_RAW:
529 case PERF_TYPE_HARDWARE:
530 case PERF_TYPE_HW_CACHE:
531 break;
532
533 default:
534 return -ENOENT;
535 }
536
499 if (!armpmu) 537 if (!armpmu)
500 return ERR_PTR(-ENODEV); 538 return -ENODEV;
501 539
502 event->destroy = hw_perf_event_destroy; 540 event->destroy = hw_perf_event_destroy;
503 541
504 if (!atomic_inc_not_zero(&active_events)) { 542 if (!atomic_inc_not_zero(&active_events)) {
505 if (atomic_read(&active_events) > perf_max_events) { 543 if (atomic_read(&active_events) > armpmu->num_events) {
506 atomic_dec(&active_events); 544 atomic_dec(&active_events);
507 return ERR_PTR(-ENOSPC); 545 return -ENOSPC;
508 } 546 }
509 547
510 mutex_lock(&pmu_reserve_mutex); 548 mutex_lock(&pmu_reserve_mutex);
@@ -518,17 +556,16 @@ hw_perf_event_init(struct perf_event *event)
518 } 556 }
519 557
520 if (err) 558 if (err)
521 return ERR_PTR(err); 559 return err;
522 560
523 err = __hw_perf_event_init(event); 561 err = __hw_perf_event_init(event);
524 if (err) 562 if (err)
525 hw_perf_event_destroy(event); 563 hw_perf_event_destroy(event);
526 564
527 return err ? ERR_PTR(err) : &pmu; 565 return err;
528} 566}
529 567
530void 568static void armpmu_enable(struct pmu *pmu)
531hw_perf_enable(void)
532{ 569{
533 /* Enable all of the perf events on hardware. */ 570 /* Enable all of the perf events on hardware. */
534 int idx; 571 int idx;
@@ -549,13 +586,23 @@ hw_perf_enable(void)
549 armpmu->start(); 586 armpmu->start();
550} 587}
551 588
552void 589static void armpmu_disable(struct pmu *pmu)
553hw_perf_disable(void)
554{ 590{
555 if (armpmu) 591 if (armpmu)
556 armpmu->stop(); 592 armpmu->stop();
557} 593}
558 594
595static struct pmu pmu = {
596 .pmu_enable = armpmu_enable,
597 .pmu_disable = armpmu_disable,
598 .event_init = armpmu_event_init,
599 .add = armpmu_add,
600 .del = armpmu_del,
601 .start = armpmu_start,
602 .stop = armpmu_stop,
603 .read = armpmu_read,
604};
605
559/* 606/*
560 * ARMv6 Performance counter handling code. 607 * ARMv6 Performance counter handling code.
561 * 608 *
@@ -1045,7 +1092,7 @@ armv6pmu_handle_irq(int irq_num,
1045 * platforms that can have the PMU interrupts raised as an NMI, this 1092 * platforms that can have the PMU interrupts raised as an NMI, this
1046 * will not work. 1093 * will not work.
1047 */ 1094 */
1048 perf_event_do_pending(); 1095 irq_work_run();
1049 1096
1050 return IRQ_HANDLED; 1097 return IRQ_HANDLED;
1051} 1098}
@@ -2021,7 +2068,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
2021 * platforms that can have the PMU interrupts raised as an NMI, this 2068 * platforms that can have the PMU interrupts raised as an NMI, this
2022 * will not work. 2069 * will not work.
2023 */ 2070 */
2024 perf_event_do_pending(); 2071 irq_work_run();
2025 2072
2026 return IRQ_HANDLED; 2073 return IRQ_HANDLED;
2027} 2074}
@@ -2389,7 +2436,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
2389 armpmu->disable(hwc, idx); 2436 armpmu->disable(hwc, idx);
2390 } 2437 }
2391 2438
2392 perf_event_do_pending(); 2439 irq_work_run();
2393 2440
2394 /* 2441 /*
2395 * Re-enable the PMU. 2442 * Re-enable the PMU.
@@ -2716,7 +2763,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
2716 armpmu->disable(hwc, idx); 2763 armpmu->disable(hwc, idx);
2717 } 2764 }
2718 2765
2719 perf_event_do_pending(); 2766 irq_work_run();
2720 2767
2721 /* 2768 /*
2722 * Re-enable the PMU. 2769 * Re-enable the PMU.
@@ -2933,14 +2980,12 @@ init_hw_perf_events(void)
2933 armpmu = &armv6pmu; 2980 armpmu = &armv6pmu;
2934 memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, 2981 memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
2935 sizeof(armv6_perf_cache_map)); 2982 sizeof(armv6_perf_cache_map));
2936 perf_max_events = armv6pmu.num_events;
2937 break; 2983 break;
2938 case 0xB020: /* ARM11mpcore */ 2984 case 0xB020: /* ARM11mpcore */
2939 armpmu = &armv6mpcore_pmu; 2985 armpmu = &armv6mpcore_pmu;
2940 memcpy(armpmu_perf_cache_map, 2986 memcpy(armpmu_perf_cache_map,
2941 armv6mpcore_perf_cache_map, 2987 armv6mpcore_perf_cache_map,
2942 sizeof(armv6mpcore_perf_cache_map)); 2988 sizeof(armv6mpcore_perf_cache_map));
2943 perf_max_events = armv6mpcore_pmu.num_events;
2944 break; 2989 break;
2945 case 0xC080: /* Cortex-A8 */ 2990 case 0xC080: /* Cortex-A8 */
2946 armv7pmu.id = ARM_PERF_PMU_ID_CA8; 2991 armv7pmu.id = ARM_PERF_PMU_ID_CA8;
@@ -2952,7 +2997,6 @@ init_hw_perf_events(void)
2952 /* Reset PMNC and read the nb of CNTx counters 2997 /* Reset PMNC and read the nb of CNTx counters
2953 supported */ 2998 supported */
2954 armv7pmu.num_events = armv7_reset_read_pmnc(); 2999 armv7pmu.num_events = armv7_reset_read_pmnc();
2955 perf_max_events = armv7pmu.num_events;
2956 break; 3000 break;
2957 case 0xC090: /* Cortex-A9 */ 3001 case 0xC090: /* Cortex-A9 */
2958 armv7pmu.id = ARM_PERF_PMU_ID_CA9; 3002 armv7pmu.id = ARM_PERF_PMU_ID_CA9;
@@ -2964,7 +3008,6 @@ init_hw_perf_events(void)
2964 /* Reset PMNC and read the nb of CNTx counters 3008 /* Reset PMNC and read the nb of CNTx counters
2965 supported */ 3009 supported */
2966 armv7pmu.num_events = armv7_reset_read_pmnc(); 3010 armv7pmu.num_events = armv7_reset_read_pmnc();
2967 perf_max_events = armv7pmu.num_events;
2968 break; 3011 break;
2969 } 3012 }
2970 /* Intel CPUs [xscale]. */ 3013 /* Intel CPUs [xscale]. */
@@ -2975,13 +3018,11 @@ init_hw_perf_events(void)
2975 armpmu = &xscale1pmu; 3018 armpmu = &xscale1pmu;
2976 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, 3019 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2977 sizeof(xscale_perf_cache_map)); 3020 sizeof(xscale_perf_cache_map));
2978 perf_max_events = xscale1pmu.num_events;
2979 break; 3021 break;
2980 case 2: 3022 case 2:
2981 armpmu = &xscale2pmu; 3023 armpmu = &xscale2pmu;
2982 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, 3024 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2983 sizeof(xscale_perf_cache_map)); 3025 sizeof(xscale_perf_cache_map));
2984 perf_max_events = xscale2pmu.num_events;
2985 break; 3026 break;
2986 } 3027 }
2987 } 3028 }
@@ -2991,9 +3032,10 @@ init_hw_perf_events(void)
2991 arm_pmu_names[armpmu->id], armpmu->num_events); 3032 arm_pmu_names[armpmu->id], armpmu->num_events);
2992 } else { 3033 } else {
2993 pr_info("no hardware support available\n"); 3034 pr_info("no hardware support available\n");
2994 perf_max_events = -1;
2995 } 3035 }
2996 3036
3037 perf_pmu_register(&pmu);
3038
2997 return 0; 3039 return 0;
2998} 3040}
2999arch_initcall(init_hw_perf_events); 3041arch_initcall(init_hw_perf_events);
@@ -3001,13 +3043,6 @@ arch_initcall(init_hw_perf_events);
3001/* 3043/*
3002 * Callchain handling code. 3044 * Callchain handling code.
3003 */ 3045 */
3004static inline void
3005callchain_store(struct perf_callchain_entry *entry,
3006 u64 ip)
3007{
3008 if (entry->nr < PERF_MAX_STACK_DEPTH)
3009 entry->ip[entry->nr++] = ip;
3010}
3011 3046
3012/* 3047/*
3013 * The registers we're interested in are at the end of the variable 3048 * The registers we're interested in are at the end of the variable
@@ -3039,7 +3074,7 @@ user_backtrace(struct frame_tail *tail,
3039 if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) 3074 if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
3040 return NULL; 3075 return NULL;
3041 3076
3042 callchain_store(entry, buftail.lr); 3077 perf_callchain_store(entry, buftail.lr);
3043 3078
3044 /* 3079 /*
3045 * Frame pointers should strictly progress back up the stack 3080 * Frame pointers should strictly progress back up the stack
@@ -3051,16 +3086,11 @@ user_backtrace(struct frame_tail *tail,
3051 return buftail.fp - 1; 3086 return buftail.fp - 1;
3052} 3087}
3053 3088
3054static void 3089void
3055perf_callchain_user(struct pt_regs *regs, 3090perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
3056 struct perf_callchain_entry *entry)
3057{ 3091{
3058 struct frame_tail *tail; 3092 struct frame_tail *tail;
3059 3093
3060 callchain_store(entry, PERF_CONTEXT_USER);
3061
3062 if (!user_mode(regs))
3063 regs = task_pt_regs(current);
3064 3094
3065 tail = (struct frame_tail *)regs->ARM_fp - 1; 3095 tail = (struct frame_tail *)regs->ARM_fp - 1;
3066 3096
@@ -3078,56 +3108,18 @@ callchain_trace(struct stackframe *fr,
3078 void *data) 3108 void *data)
3079{ 3109{
3080 struct perf_callchain_entry *entry = data; 3110 struct perf_callchain_entry *entry = data;
3081 callchain_store(entry, fr->pc); 3111 perf_callchain_store(entry, fr->pc);
3082 return 0; 3112 return 0;
3083} 3113}
3084 3114
3085static void 3115void
3086perf_callchain_kernel(struct pt_regs *regs, 3116perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
3087 struct perf_callchain_entry *entry)
3088{ 3117{
3089 struct stackframe fr; 3118 struct stackframe fr;
3090 3119
3091 callchain_store(entry, PERF_CONTEXT_KERNEL);
3092 fr.fp = regs->ARM_fp; 3120 fr.fp = regs->ARM_fp;
3093 fr.sp = regs->ARM_sp; 3121 fr.sp = regs->ARM_sp;
3094 fr.lr = regs->ARM_lr; 3122 fr.lr = regs->ARM_lr;
3095 fr.pc = regs->ARM_pc; 3123 fr.pc = regs->ARM_pc;
3096 walk_stackframe(&fr, callchain_trace, entry); 3124 walk_stackframe(&fr, callchain_trace, entry);
3097} 3125}
3098
3099static void
3100perf_do_callchain(struct pt_regs *regs,
3101 struct perf_callchain_entry *entry)
3102{
3103 int is_user;
3104
3105 if (!regs)
3106 return;
3107
3108 is_user = user_mode(regs);
3109
3110 if (!current || !current->pid)
3111 return;
3112
3113 if (is_user && current->state != TASK_RUNNING)
3114 return;
3115
3116 if (!is_user)
3117 perf_callchain_kernel(regs, entry);
3118
3119 if (current->mm)
3120 perf_callchain_user(regs, entry);
3121}
3122
3123static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
3124
3125struct perf_callchain_entry *
3126perf_callchain(struct pt_regs *regs)
3127{
3128 struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
3129
3130 entry->nr = 0;
3131 perf_do_callchain(regs, entry);
3132 return entry;
3133}
diff --git a/arch/arm/mach-at91/include/mach/system.h b/arch/arm/mach-at91/include/mach/system.h
index c80e090b3670..ee8db152592e 100644
--- a/arch/arm/mach-at91/include/mach/system.h
+++ b/arch/arm/mach-at91/include/mach/system.h
@@ -28,17 +28,16 @@
28 28
29static inline void arch_idle(void) 29static inline void arch_idle(void)
30{ 30{
31#ifndef CONFIG_DEBUG_KERNEL
32 /* 31 /*
33 * Disable the processor clock. The processor will be automatically 32 * Disable the processor clock. The processor will be automatically
34 * re-enabled by an interrupt or by a reset. 33 * re-enabled by an interrupt or by a reset.
35 */ 34 */
36 at91_sys_write(AT91_PMC_SCDR, AT91_PMC_PCK); 35 at91_sys_write(AT91_PMC_SCDR, AT91_PMC_PCK);
37#else 36#ifndef CONFIG_CPU_ARM920T
38 /* 37 /*
39 * Set the processor (CP15) into 'Wait for Interrupt' mode. 38 * Set the processor (CP15) into 'Wait for Interrupt' mode.
40 * Unlike disabling the processor clock via the PMC (above) 39 * Post-RM9200 processors need this in conjunction with the above
41 * this allows the processor to be woken via JTAG. 40 * to save power when idle.
42 */ 41 */
43 cpu_do_idle(); 42 cpu_do_idle();
44#endif 43#endif
diff --git a/arch/arm/mach-bcmring/dma.c b/arch/arm/mach-bcmring/dma.c
index 29c0a911df26..77eb35c89cd0 100644
--- a/arch/arm/mach-bcmring/dma.c
+++ b/arch/arm/mach-bcmring/dma.c
@@ -691,7 +691,7 @@ int dma_init(void)
691 691
692 memset(&gDMA, 0, sizeof(gDMA)); 692 memset(&gDMA, 0, sizeof(gDMA));
693 693
694 init_MUTEX_LOCKED(&gDMA.lock); 694 sema_init(&gDMA.lock, 0);
695 init_waitqueue_head(&gDMA.freeChannelQ); 695 init_waitqueue_head(&gDMA.freeChannelQ);
696 696
697 /* Initialize the Hardware */ 697 /* Initialize the Hardware */
@@ -1574,7 +1574,7 @@ int dma_init_mem_map(DMA_MemMap_t *memMap)
1574{ 1574{
1575 memset(memMap, 0, sizeof(*memMap)); 1575 memset(memMap, 0, sizeof(*memMap));
1576 1576
1577 init_MUTEX(&memMap->lock); 1577 sema_init(&memMap->lock, 1);
1578 1578
1579 return 0; 1579 return 0;
1580} 1580}
diff --git a/arch/arm/mach-bcmring/irq.c b/arch/arm/mach-bcmring/irq.c
index dc1c4939b0ce..e3152631eb37 100644
--- a/arch/arm/mach-bcmring/irq.c
+++ b/arch/arm/mach-bcmring/irq.c
@@ -67,21 +67,21 @@ static void bcmring_unmask_irq2(unsigned int irq)
67} 67}
68 68
69static struct irq_chip bcmring_irq0_chip = { 69static struct irq_chip bcmring_irq0_chip = {
70 .typename = "ARM-INTC0", 70 .name = "ARM-INTC0",
71 .ack = bcmring_mask_irq0, 71 .ack = bcmring_mask_irq0,
72 .mask = bcmring_mask_irq0, /* mask a specific interrupt, blocking its delivery. */ 72 .mask = bcmring_mask_irq0, /* mask a specific interrupt, blocking its delivery. */
73 .unmask = bcmring_unmask_irq0, /* unmaks an interrupt */ 73 .unmask = bcmring_unmask_irq0, /* unmaks an interrupt */
74}; 74};
75 75
76static struct irq_chip bcmring_irq1_chip = { 76static struct irq_chip bcmring_irq1_chip = {
77 .typename = "ARM-INTC1", 77 .name = "ARM-INTC1",
78 .ack = bcmring_mask_irq1, 78 .ack = bcmring_mask_irq1,
79 .mask = bcmring_mask_irq1, 79 .mask = bcmring_mask_irq1,
80 .unmask = bcmring_unmask_irq1, 80 .unmask = bcmring_unmask_irq1,
81}; 81};
82 82
83static struct irq_chip bcmring_irq2_chip = { 83static struct irq_chip bcmring_irq2_chip = {
84 .typename = "ARM-SINTC", 84 .name = "ARM-SINTC",
85 .ack = bcmring_mask_irq2, 85 .ack = bcmring_mask_irq2,
86 .mask = bcmring_mask_irq2, 86 .mask = bcmring_mask_irq2,
87 .unmask = bcmring_unmask_irq2, 87 .unmask = bcmring_unmask_irq2,
diff --git a/arch/arm/mach-ep93xx/dma-m2p.c b/arch/arm/mach-ep93xx/dma-m2p.c
index 8904ca4e2e24..a696d354b1f8 100644
--- a/arch/arm/mach-ep93xx/dma-m2p.c
+++ b/arch/arm/mach-ep93xx/dma-m2p.c
@@ -276,7 +276,7 @@ static void channel_disable(struct m2p_channel *ch)
276 v &= ~(M2P_CONTROL_STALL_IRQ_EN | M2P_CONTROL_NFB_IRQ_EN); 276 v &= ~(M2P_CONTROL_STALL_IRQ_EN | M2P_CONTROL_NFB_IRQ_EN);
277 m2p_set_control(ch, v); 277 m2p_set_control(ch, v);
278 278
279 while (m2p_channel_state(ch) == STATE_ON) 279 while (m2p_channel_state(ch) >= STATE_ON)
280 cpu_relax(); 280 cpu_relax();
281 281
282 m2p_set_control(ch, 0x0); 282 m2p_set_control(ch, 0x0);
diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig
index c5c0369bb481..2f7e2728970d 100644
--- a/arch/arm/mach-imx/Kconfig
+++ b/arch/arm/mach-imx/Kconfig
@@ -122,6 +122,7 @@ config MACH_CPUIMX27
122 select IMX_HAVE_PLATFORM_IMX_I2C 122 select IMX_HAVE_PLATFORM_IMX_I2C
123 select IMX_HAVE_PLATFORM_IMX_UART 123 select IMX_HAVE_PLATFORM_IMX_UART
124 select IMX_HAVE_PLATFORM_MXC_NAND 124 select IMX_HAVE_PLATFORM_MXC_NAND
125 select MXC_ULPI if USB_ULPI
125 help 126 help
126 Include support for Eukrea CPUIMX27 platform. This includes 127 Include support for Eukrea CPUIMX27 platform. This includes
127 specific configurations for the module and its peripherals. 128 specific configurations for the module and its peripherals.
diff --git a/arch/arm/mach-imx/mach-cpuimx27.c b/arch/arm/mach-imx/mach-cpuimx27.c
index 339150ab0ea5..6830afd1d2ba 100644
--- a/arch/arm/mach-imx/mach-cpuimx27.c
+++ b/arch/arm/mach-imx/mach-cpuimx27.c
@@ -259,7 +259,7 @@ static void __init eukrea_cpuimx27_init(void)
259 i2c_register_board_info(0, eukrea_cpuimx27_i2c_devices, 259 i2c_register_board_info(0, eukrea_cpuimx27_i2c_devices,
260 ARRAY_SIZE(eukrea_cpuimx27_i2c_devices)); 260 ARRAY_SIZE(eukrea_cpuimx27_i2c_devices));
261 261
262 imx27_add_i2c_imx1(&cpuimx27_i2c1_data); 262 imx27_add_i2c_imx0(&cpuimx27_i2c1_data);
263 263
264 platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices)); 264 platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices));
265 265
diff --git a/arch/arm/mach-iop13xx/msi.c b/arch/arm/mach-iop13xx/msi.c
index f34b0ed80630..7149fcc16c8a 100644
--- a/arch/arm/mach-iop13xx/msi.c
+++ b/arch/arm/mach-iop13xx/msi.c
@@ -164,10 +164,10 @@ static void iop13xx_msi_nop(unsigned int irq)
164static struct irq_chip iop13xx_msi_chip = { 164static struct irq_chip iop13xx_msi_chip = {
165 .name = "PCI-MSI", 165 .name = "PCI-MSI",
166 .ack = iop13xx_msi_nop, 166 .ack = iop13xx_msi_nop,
167 .enable = unmask_msi_irq, 167 .irq_enable = unmask_msi_irq,
168 .disable = mask_msi_irq, 168 .irq_disable = mask_msi_irq,
169 .mask = mask_msi_irq, 169 .irq_mask = mask_msi_irq,
170 .unmask = unmask_msi_irq, 170 .irq_unmask = unmask_msi_irq,
171}; 171};
172 172
173int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) 173int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c
index bc9a42da2145..0c936cf5675a 100644
--- a/arch/arm/mach-lpc32xx/phy3250.c
+++ b/arch/arm/mach-lpc32xx/phy3250.c
@@ -172,18 +172,12 @@ static void phy3250_spi_cs_set(u32 control)
172} 172}
173 173
174static struct pl022_config_chip spi0_chip_info = { 174static struct pl022_config_chip spi0_chip_info = {
175 .lbm = LOOPBACK_DISABLED,
176 .com_mode = INTERRUPT_TRANSFER, 175 .com_mode = INTERRUPT_TRANSFER,
177 .iface = SSP_INTERFACE_MOTOROLA_SPI, 176 .iface = SSP_INTERFACE_MOTOROLA_SPI,
178 .hierarchy = SSP_MASTER, 177 .hierarchy = SSP_MASTER,
179 .slave_tx_disable = 0, 178 .slave_tx_disable = 0,
180 .endian_tx = SSP_TX_LSB,
181 .endian_rx = SSP_RX_LSB,
182 .data_size = SSP_DATA_BITS_8,
183 .rx_lev_trig = SSP_RX_4_OR_MORE_ELEM, 179 .rx_lev_trig = SSP_RX_4_OR_MORE_ELEM,
184 .tx_lev_trig = SSP_TX_4_OR_MORE_EMPTY_LOC, 180 .tx_lev_trig = SSP_TX_4_OR_MORE_EMPTY_LOC,
185 .clk_phase = SSP_CLK_FIRST_EDGE,
186 .clk_pol = SSP_CLK_POL_IDLE_LOW,
187 .ctrl_len = SSP_BITS_8, 181 .ctrl_len = SSP_BITS_8,
188 .wait_state = SSP_MWIRE_WAIT_ZERO, 182 .wait_state = SSP_MWIRE_WAIT_ZERO,
189 .duplex = SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, 183 .duplex = SSP_MICROWIRE_CHANNEL_FULL_DUPLEX,
@@ -239,6 +233,7 @@ static int __init phy3250_spi_board_register(void)
239 .max_speed_hz = 5000000, 233 .max_speed_hz = 5000000,
240 .bus_num = 0, 234 .bus_num = 0,
241 .chip_select = 0, 235 .chip_select = 0,
236 .mode = SPI_MODE_0,
242 .platform_data = &eeprom, 237 .platform_data = &eeprom,
243 .controller_data = &spi0_chip_info, 238 .controller_data = &spi0_chip_info,
244 }, 239 },
diff --git a/arch/arm/mach-s5p6440/cpu.c b/arch/arm/mach-s5p6440/cpu.c
index 526f33adb31d..ec592e866054 100644
--- a/arch/arm/mach-s5p6440/cpu.c
+++ b/arch/arm/mach-s5p6440/cpu.c
@@ -19,6 +19,7 @@
19#include <linux/sysdev.h> 19#include <linux/sysdev.h>
20#include <linux/serial_core.h> 20#include <linux/serial_core.h>
21#include <linux/platform_device.h> 21#include <linux/platform_device.h>
22#include <linux/sched.h>
22 23
23#include <asm/mach/arch.h> 24#include <asm/mach/arch.h>
24#include <asm/mach/map.h> 25#include <asm/mach/map.h>
diff --git a/arch/arm/mach-s5p6442/cpu.c b/arch/arm/mach-s5p6442/cpu.c
index a48fb553fd01..70ac681af72b 100644
--- a/arch/arm/mach-s5p6442/cpu.c
+++ b/arch/arm/mach-s5p6442/cpu.c
@@ -19,6 +19,7 @@
19#include <linux/sysdev.h> 19#include <linux/sysdev.h>
20#include <linux/serial_core.h> 20#include <linux/serial_core.h>
21#include <linux/platform_device.h> 21#include <linux/platform_device.h>
22#include <linux/sched.h>
22 23
23#include <asm/mach/arch.h> 24#include <asm/mach/arch.h>
24#include <asm/mach/map.h> 25#include <asm/mach/map.h>
diff --git a/arch/arm/mach-s5pc100/cpu.c b/arch/arm/mach-s5pc100/cpu.c
index 251c92ac5b22..cd1afbce83e2 100644
--- a/arch/arm/mach-s5pc100/cpu.c
+++ b/arch/arm/mach-s5pc100/cpu.c
@@ -21,6 +21,7 @@
21#include <linux/sysdev.h> 21#include <linux/sysdev.h>
22#include <linux/serial_core.h> 22#include <linux/serial_core.h>
23#include <linux/platform_device.h> 23#include <linux/platform_device.h>
24#include <linux/sched.h>
24 25
25#include <asm/mach/arch.h> 26#include <asm/mach/arch.h>
26#include <asm/mach/map.h> 27#include <asm/mach/map.h>
diff --git a/arch/arm/mach-s5pv210/clock.c b/arch/arm/mach-s5pv210/clock.c
index cfecd70657cb..d562670e1b0b 100644
--- a/arch/arm/mach-s5pv210/clock.c
+++ b/arch/arm/mach-s5pv210/clock.c
@@ -173,11 +173,6 @@ static int s5pv210_clk_ip3_ctrl(struct clk *clk, int enable)
173 return s5p_gatectrl(S5P_CLKGATE_IP3, clk, enable); 173 return s5p_gatectrl(S5P_CLKGATE_IP3, clk, enable);
174} 174}
175 175
176static int s5pv210_clk_ip4_ctrl(struct clk *clk, int enable)
177{
178 return s5p_gatectrl(S5P_CLKGATE_IP4, clk, enable);
179}
180
181static int s5pv210_clk_mask0_ctrl(struct clk *clk, int enable) 176static int s5pv210_clk_mask0_ctrl(struct clk *clk, int enable)
182{ 177{
183 return s5p_gatectrl(S5P_CLK_SRC_MASK0, clk, enable); 178 return s5p_gatectrl(S5P_CLK_SRC_MASK0, clk, enable);
diff --git a/arch/arm/mach-s5pv210/cpu.c b/arch/arm/mach-s5pv210/cpu.c
index 77f456c91ad3..245b82b53df4 100644
--- a/arch/arm/mach-s5pv210/cpu.c
+++ b/arch/arm/mach-s5pv210/cpu.c
@@ -19,6 +19,7 @@
19#include <linux/io.h> 19#include <linux/io.h>
20#include <linux/sysdev.h> 20#include <linux/sysdev.h>
21#include <linux/platform_device.h> 21#include <linux/platform_device.h>
22#include <linux/sched.h>
22 23
23#include <asm/mach/arch.h> 24#include <asm/mach/arch.h>
24#include <asm/mach/map.h> 25#include <asm/mach/map.h>
diff --git a/arch/arm/mach-u300/dummyspichip.c b/arch/arm/mach-u300/dummyspichip.c
index 5f55012b7c9e..03f793612594 100644
--- a/arch/arm/mach-u300/dummyspichip.c
+++ b/arch/arm/mach-u300/dummyspichip.c
@@ -46,7 +46,6 @@ static ssize_t dummy_looptest(struct device *dev,
46 * struct, this is just used here to alter the behaviour of the chip 46 * struct, this is just used here to alter the behaviour of the chip
47 * in order to perform tests. 47 * in order to perform tests.
48 */ 48 */
49 struct pl022_config_chip *chip_info = spi->controller_data;
50 int status; 49 int status;
51 u8 txbuf[14] = {0xDE, 0xAD, 0xBE, 0xEF, 0x2B, 0xAD, 50 u8 txbuf[14] = {0xDE, 0xAD, 0xBE, 0xEF, 0x2B, 0xAD,
52 0xCA, 0xFE, 0xBA, 0xBE, 0xB1, 0x05, 51 0xCA, 0xFE, 0xBA, 0xBE, 0xB1, 0x05,
@@ -72,7 +71,7 @@ static ssize_t dummy_looptest(struct device *dev,
72 * Force chip to 8 bit mode 71 * Force chip to 8 bit mode
73 * WARNING: NEVER DO THIS IN REAL DRIVER CODE, THIS SHOULD BE STATIC! 72 * WARNING: NEVER DO THIS IN REAL DRIVER CODE, THIS SHOULD BE STATIC!
74 */ 73 */
75 chip_info->data_size = SSP_DATA_BITS_8; 74 spi->bits_per_word = 8;
76 /* You should NOT DO THIS EITHER */ 75 /* You should NOT DO THIS EITHER */
77 spi->master->setup(spi); 76 spi->master->setup(spi);
78 77
@@ -159,7 +158,7 @@ static ssize_t dummy_looptest(struct device *dev,
159 * Force chip to 16 bit mode 158 * Force chip to 16 bit mode
160 * WARNING: NEVER DO THIS IN REAL DRIVER CODE, THIS SHOULD BE STATIC! 159 * WARNING: NEVER DO THIS IN REAL DRIVER CODE, THIS SHOULD BE STATIC!
161 */ 160 */
162 chip_info->data_size = SSP_DATA_BITS_16; 161 spi->bits_per_word = 16;
163 /* You should NOT DO THIS EITHER */ 162 /* You should NOT DO THIS EITHER */
164 spi->master->setup(spi); 163 spi->master->setup(spi);
165 164
diff --git a/arch/arm/mach-u300/spi.c b/arch/arm/mach-u300/spi.c
index f0e887bea30e..edb2c0d255c2 100644
--- a/arch/arm/mach-u300/spi.c
+++ b/arch/arm/mach-u300/spi.c
@@ -30,8 +30,6 @@ static void select_dummy_chip(u32 chipselect)
30} 30}
31 31
32struct pl022_config_chip dummy_chip_info = { 32struct pl022_config_chip dummy_chip_info = {
33 /* Nominally this is LOOPBACK_DISABLED, but this is our dummy chip! */
34 .lbm = LOOPBACK_ENABLED,
35 /* 33 /*
36 * available POLLING_TRANSFER and INTERRUPT_TRANSFER, 34 * available POLLING_TRANSFER and INTERRUPT_TRANSFER,
37 * DMA_TRANSFER does not work 35 * DMA_TRANSFER does not work
@@ -42,14 +40,8 @@ struct pl022_config_chip dummy_chip_info = {
42 .hierarchy = SSP_MASTER, 40 .hierarchy = SSP_MASTER,
43 /* 0 = drive TX even as slave, 1 = do not drive TX as slave */ 41 /* 0 = drive TX even as slave, 1 = do not drive TX as slave */
44 .slave_tx_disable = 0, 42 .slave_tx_disable = 0,
45 /* LSB first */
46 .endian_tx = SSP_TX_LSB,
47 .endian_rx = SSP_RX_LSB,
48 .data_size = SSP_DATA_BITS_8, /* used to be 12 in some default */
49 .rx_lev_trig = SSP_RX_1_OR_MORE_ELEM, 43 .rx_lev_trig = SSP_RX_1_OR_MORE_ELEM,
50 .tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC, 44 .tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC,
51 .clk_phase = SSP_CLK_SECOND_EDGE,
52 .clk_pol = SSP_CLK_POL_IDLE_LOW,
53 .ctrl_len = SSP_BITS_12, 45 .ctrl_len = SSP_BITS_12,
54 .wait_state = SSP_MWIRE_WAIT_ZERO, 46 .wait_state = SSP_MWIRE_WAIT_ZERO,
55 .duplex = SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, 47 .duplex = SSP_MICROWIRE_CHANNEL_FULL_DUPLEX,
@@ -75,7 +67,7 @@ static struct spi_board_info u300_spi_devices[] = {
75 .bus_num = 0, /* Only one bus on this chip */ 67 .bus_num = 0, /* Only one bus on this chip */
76 .chip_select = 0, 68 .chip_select = 0,
77 /* Means SPI_CS_HIGH, change if e.g low CS */ 69 /* Means SPI_CS_HIGH, change if e.g low CS */
78 .mode = 0, 70 .mode = SPI_MODE_1 | SPI_LSB_FIRST | SPI_LOOP,
79 }, 71 },
80#endif 72#endif
81}; 73};
diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index 0e8fd135a57d..219ae0ca4eef 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -55,19 +55,13 @@ static void ab4500_spi_cs_control(u32 command)
55} 55}
56 56
57struct pl022_config_chip ab4500_chip_info = { 57struct pl022_config_chip ab4500_chip_info = {
58 .lbm = LOOPBACK_DISABLED,
59 .com_mode = INTERRUPT_TRANSFER, 58 .com_mode = INTERRUPT_TRANSFER,
60 .iface = SSP_INTERFACE_MOTOROLA_SPI, 59 .iface = SSP_INTERFACE_MOTOROLA_SPI,
61 /* we can act as master only */ 60 /* we can act as master only */
62 .hierarchy = SSP_MASTER, 61 .hierarchy = SSP_MASTER,
63 .slave_tx_disable = 0, 62 .slave_tx_disable = 0,
64 .endian_rx = SSP_RX_MSB,
65 .endian_tx = SSP_TX_MSB,
66 .data_size = SSP_DATA_BITS_24,
67 .rx_lev_trig = SSP_RX_1_OR_MORE_ELEM, 63 .rx_lev_trig = SSP_RX_1_OR_MORE_ELEM,
68 .tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC, 64 .tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC,
69 .clk_phase = SSP_CLK_SECOND_EDGE,
70 .clk_pol = SSP_CLK_POL_IDLE_HIGH,
71 .cs_control = ab4500_spi_cs_control, 65 .cs_control = ab4500_spi_cs_control,
72}; 66};
73 67
@@ -83,7 +77,7 @@ static struct spi_board_info u8500_spi_devices[] = {
83 .max_speed_hz = 12000000, 77 .max_speed_hz = 12000000,
84 .bus_num = 0, 78 .bus_num = 0,
85 .chip_select = 0, 79 .chip_select = 0,
86 .mode = SPI_MODE_0, 80 .mode = SPI_MODE_3,
87 .irq = IRQ_DB8500_AB8500, 81 .irq = IRQ_DB8500_AB8500,
88 }, 82 },
89}; 83};
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index efb127022d42..71fb17349520 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -68,7 +68,7 @@ static void __init ct_ca9x4_init_irq(void)
68} 68}
69 69
70#if 0 70#if 0
71static void ct_ca9x4_timer_init(void) 71static void __init ct_ca9x4_timer_init(void)
72{ 72{
73 writel(0, MMIO_P2V(CT_CA9X4_TIMER0) + TIMER_CTRL); 73 writel(0, MMIO_P2V(CT_CA9X4_TIMER0) + TIMER_CTRL);
74 writel(0, MMIO_P2V(CT_CA9X4_TIMER1) + TIMER_CTRL); 74 writel(0, MMIO_P2V(CT_CA9X4_TIMER1) + TIMER_CTRL);
@@ -222,7 +222,7 @@ static struct platform_device pmu_device = {
222 .resource = pmu_resources, 222 .resource = pmu_resources,
223}; 223};
224 224
225static void ct_ca9x4_init(void) 225static void __init ct_ca9x4_init(void)
226{ 226{
227 int i; 227 int i;
228 228
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 817f0ad38a0b..7eaa232180a5 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -48,7 +48,7 @@ void __init v2m_map_io(struct map_desc *tile, size_t num)
48} 48}
49 49
50 50
51static void v2m_timer_init(void) 51static void __init v2m_timer_init(void)
52{ 52{
53 writel(0, MMIO_P2V(V2M_TIMER0) + TIMER_CTRL); 53 writel(0, MMIO_P2V(V2M_TIMER0) + TIMER_CTRL);
54 writel(0, MMIO_P2V(V2M_TIMER1) + TIMER_CTRL); 54 writel(0, MMIO_P2V(V2M_TIMER1) + TIMER_CTRL);
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index ab506272b2d3..17e7b0b57e49 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -204,8 +204,12 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
204 /* 204 /*
205 * Don't allow RAM to be mapped - this causes problems with ARMv6+ 205 * Don't allow RAM to be mapped - this causes problems with ARMv6+
206 */ 206 */
207 if (WARN_ON(pfn_valid(pfn))) 207 if (pfn_valid(pfn)) {
208 return NULL; 208 printk(KERN_WARNING "BUG: Your driver calls ioremap() on system memory. This leads\n"
209 KERN_WARNING "to architecturally unpredictable behaviour on ARMv6+, and ioremap()\n"
210 KERN_WARNING "will fail in the next kernel release. Please fix your driver.\n");
211 WARN_ON(1);
212 }
209 213
210 type = get_mem_type(mtype); 214 type = get_mem_type(mtype);
211 if (!type) 215 if (!type)
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 6a3a2d0cd6db..e8ed9dc461fe 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -248,7 +248,7 @@ static struct mem_type mem_types[] = {
248 }, 248 },
249 [MT_MEMORY] = { 249 [MT_MEMORY] = {
250 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 250 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
251 L_PTE_USER | L_PTE_EXEC, 251 L_PTE_WRITE | L_PTE_EXEC,
252 .prot_l1 = PMD_TYPE_TABLE, 252 .prot_l1 = PMD_TYPE_TABLE,
253 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, 253 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
254 .domain = DOMAIN_KERNEL, 254 .domain = DOMAIN_KERNEL,
@@ -259,7 +259,7 @@ static struct mem_type mem_types[] = {
259 }, 259 },
260 [MT_MEMORY_NONCACHED] = { 260 [MT_MEMORY_NONCACHED] = {
261 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 261 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
262 L_PTE_USER | L_PTE_EXEC | L_PTE_MT_BUFFERABLE, 262 L_PTE_WRITE | L_PTE_EXEC | L_PTE_MT_BUFFERABLE,
263 .prot_l1 = PMD_TYPE_TABLE, 263 .prot_l1 = PMD_TYPE_TABLE,
264 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, 264 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
265 .domain = DOMAIN_KERNEL, 265 .domain = DOMAIN_KERNEL,
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 7563ff0141bd..197f21bed5e9 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -253,6 +253,14 @@ __v7_setup:
253 orreq r10, r10, #1 << 22 @ set bit #22 253 orreq r10, r10, #1 << 22 @ set bit #22
254 mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register 254 mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
255#endif 255#endif
256#ifdef CONFIG_ARM_ERRATA_743622
257 teq r6, #0x20 @ present in r2p0
258 teqne r6, #0x21 @ present in r2p1
259 teqne r6, #0x22 @ present in r2p2
260 mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register
261 orreq r10, r10, #1 << 6 @ set bit #6
262 mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
263#endif
256 264
2573: mov r10, #0 2653: mov r10, #0
258#ifdef HARVARD_CACHE 266#ifdef HARVARD_CACHE
@@ -365,7 +373,7 @@ __v7_ca9mp_proc_info:
365 b __v7_ca9mp_setup 373 b __v7_ca9mp_setup
366 .long cpu_arch_name 374 .long cpu_arch_name
367 .long cpu_elf_name 375 .long cpu_elf_name
368 .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP 376 .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS
369 .long cpu_v7_name 377 .long cpu_v7_name
370 .long v7_processor_functions 378 .long v7_processor_functions
371 .long v7wbi_tlb_fns 379 .long v7wbi_tlb_fns
diff --git a/arch/arm/oprofile/Makefile b/arch/arm/oprofile/Makefile
index e666eafed152..b2215c61cdf0 100644
--- a/arch/arm/oprofile/Makefile
+++ b/arch/arm/oprofile/Makefile
@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
6 oprofilefs.o oprofile_stats.o \ 6 oprofilefs.o oprofile_stats.o \
7 timer_int.o ) 7 timer_int.o )
8 8
9ifeq ($(CONFIG_HW_PERF_EVENTS),y)
10DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
11endif
12
9oprofile-y := $(DRIVER_OBJS) common.o 13oprofile-y := $(DRIVER_OBJS) common.o
diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index 72e09eb642dd..8aa974491dfc 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -25,139 +25,10 @@
25#include <asm/ptrace.h> 25#include <asm/ptrace.h>
26 26
27#ifdef CONFIG_HW_PERF_EVENTS 27#ifdef CONFIG_HW_PERF_EVENTS
28/* 28char *op_name_from_perf_id(void)
29 * Per performance monitor configuration as set via oprofilefs.
30 */
31struct op_counter_config {
32 unsigned long count;
33 unsigned long enabled;
34 unsigned long event;
35 unsigned long unit_mask;
36 unsigned long kernel;
37 unsigned long user;
38 struct perf_event_attr attr;
39};
40
41static int op_arm_enabled;
42static DEFINE_MUTEX(op_arm_mutex);
43
44static struct op_counter_config *counter_config;
45static struct perf_event **perf_events[nr_cpumask_bits];
46static int perf_num_counters;
47
48/*
49 * Overflow callback for oprofile.
50 */
51static void op_overflow_handler(struct perf_event *event, int unused,
52 struct perf_sample_data *data, struct pt_regs *regs)
53{ 29{
54 int id; 30 enum arm_perf_pmu_ids id = armpmu_get_pmu_id();
55 u32 cpu = smp_processor_id();
56
57 for (id = 0; id < perf_num_counters; ++id)
58 if (perf_events[cpu][id] == event)
59 break;
60
61 if (id != perf_num_counters)
62 oprofile_add_sample(regs, id);
63 else
64 pr_warning("oprofile: ignoring spurious overflow "
65 "on cpu %u\n", cpu);
66}
67
68/*
69 * Called by op_arm_setup to create perf attributes to mirror the oprofile
70 * settings in counter_config. Attributes are created as `pinned' events and
71 * so are permanently scheduled on the PMU.
72 */
73static void op_perf_setup(void)
74{
75 int i;
76 u32 size = sizeof(struct perf_event_attr);
77 struct perf_event_attr *attr;
78
79 for (i = 0; i < perf_num_counters; ++i) {
80 attr = &counter_config[i].attr;
81 memset(attr, 0, size);
82 attr->type = PERF_TYPE_RAW;
83 attr->size = size;
84 attr->config = counter_config[i].event;
85 attr->sample_period = counter_config[i].count;
86 attr->pinned = 1;
87 }
88}
89
90static int op_create_counter(int cpu, int event)
91{
92 int ret = 0;
93 struct perf_event *pevent;
94
95 if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL))
96 return ret;
97
98 pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
99 cpu, -1,
100 op_overflow_handler);
101
102 if (IS_ERR(pevent)) {
103 ret = PTR_ERR(pevent);
104 } else if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
105 perf_event_release_kernel(pevent);
106 pr_warning("oprofile: failed to enable event %d "
107 "on CPU %d\n", event, cpu);
108 ret = -EBUSY;
109 } else {
110 perf_events[cpu][event] = pevent;
111 }
112
113 return ret;
114}
115 31
116static void op_destroy_counter(int cpu, int event)
117{
118 struct perf_event *pevent = perf_events[cpu][event];
119
120 if (pevent) {
121 perf_event_release_kernel(pevent);
122 perf_events[cpu][event] = NULL;
123 }
124}
125
126/*
127 * Called by op_arm_start to create active perf events based on the
128 * perviously configured attributes.
129 */
130static int op_perf_start(void)
131{
132 int cpu, event, ret = 0;
133
134 for_each_online_cpu(cpu) {
135 for (event = 0; event < perf_num_counters; ++event) {
136 ret = op_create_counter(cpu, event);
137 if (ret)
138 goto out;
139 }
140 }
141
142out:
143 return ret;
144}
145
146/*
147 * Called by op_arm_stop at the end of a profiling run.
148 */
149static void op_perf_stop(void)
150{
151 int cpu, event;
152
153 for_each_online_cpu(cpu)
154 for (event = 0; event < perf_num_counters; ++event)
155 op_destroy_counter(cpu, event);
156}
157
158
159static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
160{
161 switch (id) { 32 switch (id) {
162 case ARM_PERF_PMU_ID_XSCALE1: 33 case ARM_PERF_PMU_ID_XSCALE1:
163 return "arm/xscale1"; 34 return "arm/xscale1";
@@ -176,116 +47,6 @@ static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
176 } 47 }
177} 48}
178 49
179static int op_arm_create_files(struct super_block *sb, struct dentry *root)
180{
181 unsigned int i;
182
183 for (i = 0; i < perf_num_counters; i++) {
184 struct dentry *dir;
185 char buf[4];
186
187 snprintf(buf, sizeof buf, "%d", i);
188 dir = oprofilefs_mkdir(sb, root, buf);
189 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
190 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
191 oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
192 oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
193 oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
194 oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
195 }
196
197 return 0;
198}
199
200static int op_arm_setup(void)
201{
202 spin_lock(&oprofilefs_lock);
203 op_perf_setup();
204 spin_unlock(&oprofilefs_lock);
205 return 0;
206}
207
208static int op_arm_start(void)
209{
210 int ret = -EBUSY;
211
212 mutex_lock(&op_arm_mutex);
213 if (!op_arm_enabled) {
214 ret = 0;
215 op_perf_start();
216 op_arm_enabled = 1;
217 }
218 mutex_unlock(&op_arm_mutex);
219 return ret;
220}
221
222static void op_arm_stop(void)
223{
224 mutex_lock(&op_arm_mutex);
225 if (op_arm_enabled)
226 op_perf_stop();
227 op_arm_enabled = 0;
228 mutex_unlock(&op_arm_mutex);
229}
230
231#ifdef CONFIG_PM
232static int op_arm_suspend(struct platform_device *dev, pm_message_t state)
233{
234 mutex_lock(&op_arm_mutex);
235 if (op_arm_enabled)
236 op_perf_stop();
237 mutex_unlock(&op_arm_mutex);
238 return 0;
239}
240
241static int op_arm_resume(struct platform_device *dev)
242{
243 mutex_lock(&op_arm_mutex);
244 if (op_arm_enabled && op_perf_start())
245 op_arm_enabled = 0;
246 mutex_unlock(&op_arm_mutex);
247 return 0;
248}
249
250static struct platform_driver oprofile_driver = {
251 .driver = {
252 .name = "arm-oprofile",
253 },
254 .resume = op_arm_resume,
255 .suspend = op_arm_suspend,
256};
257
258static struct platform_device *oprofile_pdev;
259
260static int __init init_driverfs(void)
261{
262 int ret;
263
264 ret = platform_driver_register(&oprofile_driver);
265 if (ret)
266 goto out;
267
268 oprofile_pdev = platform_device_register_simple(
269 oprofile_driver.driver.name, 0, NULL, 0);
270 if (IS_ERR(oprofile_pdev)) {
271 ret = PTR_ERR(oprofile_pdev);
272 platform_driver_unregister(&oprofile_driver);
273 }
274
275out:
276 return ret;
277}
278
279static void exit_driverfs(void)
280{
281 platform_device_unregister(oprofile_pdev);
282 platform_driver_unregister(&oprofile_driver);
283}
284#else
285static int __init init_driverfs(void) { return 0; }
286#define exit_driverfs() do { } while (0)
287#endif /* CONFIG_PM */
288
289static int report_trace(struct stackframe *frame, void *d) 50static int report_trace(struct stackframe *frame, void *d)
290{ 51{
291 unsigned int *depth = d; 52 unsigned int *depth = d;
@@ -350,74 +111,14 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
350 111
351int __init oprofile_arch_init(struct oprofile_operations *ops) 112int __init oprofile_arch_init(struct oprofile_operations *ops)
352{ 113{
353 int cpu, ret = 0;
354
355 perf_num_counters = armpmu_get_max_events();
356
357 counter_config = kcalloc(perf_num_counters,
358 sizeof(struct op_counter_config), GFP_KERNEL);
359
360 if (!counter_config) {
361 pr_info("oprofile: failed to allocate %d "
362 "counters\n", perf_num_counters);
363 return -ENOMEM;
364 }
365
366 ret = init_driverfs();
367 if (ret) {
368 kfree(counter_config);
369 counter_config = NULL;
370 return ret;
371 }
372
373 for_each_possible_cpu(cpu) {
374 perf_events[cpu] = kcalloc(perf_num_counters,
375 sizeof(struct perf_event *), GFP_KERNEL);
376 if (!perf_events[cpu]) {
377 pr_info("oprofile: failed to allocate %d perf events "
378 "for cpu %d\n", perf_num_counters, cpu);
379 while (--cpu >= 0)
380 kfree(perf_events[cpu]);
381 return -ENOMEM;
382 }
383 }
384
385 ops->backtrace = arm_backtrace; 114 ops->backtrace = arm_backtrace;
386 ops->create_files = op_arm_create_files;
387 ops->setup = op_arm_setup;
388 ops->start = op_arm_start;
389 ops->stop = op_arm_stop;
390 ops->shutdown = op_arm_stop;
391 ops->cpu_type = op_name_from_perf_id(armpmu_get_pmu_id());
392
393 if (!ops->cpu_type)
394 ret = -ENODEV;
395 else
396 pr_info("oprofile: using %s\n", ops->cpu_type);
397 115
398 return ret; 116 return oprofile_perf_init(ops);
399} 117}
400 118
401void oprofile_arch_exit(void) 119void __exit oprofile_arch_exit(void)
402{ 120{
403 int cpu, id; 121 oprofile_perf_exit();
404 struct perf_event *event;
405
406 if (*perf_events) {
407 for_each_possible_cpu(cpu) {
408 for (id = 0; id < perf_num_counters; ++id) {
409 event = perf_events[cpu][id];
410 if (event != NULL)
411 perf_event_release_kernel(event);
412 }
413 kfree(perf_events[cpu]);
414 }
415 }
416
417 if (counter_config) {
418 kfree(counter_config);
419 exit_driverfs();
420 }
421} 122}
422#else 123#else
423int __init oprofile_arch_init(struct oprofile_operations *ops) 124int __init oprofile_arch_init(struct oprofile_operations *ops)
@@ -425,5 +126,5 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
425 pr_info("oprofile: hardware counters not available\n"); 126 pr_info("oprofile: hardware counters not available\n");
426 return -ENODEV; 127 return -ENODEV;
427} 128}
428void oprofile_arch_exit(void) {} 129void __exit oprofile_arch_exit(void) {}
429#endif /* CONFIG_HW_PERF_EVENTS */ 130#endif /* CONFIG_HW_PERF_EVENTS */
diff --git a/arch/arm/plat-omap/iommu.c b/arch/arm/plat-omap/iommu.c
index a202a2ce6e3d..6cd151b31bc5 100644
--- a/arch/arm/plat-omap/iommu.c
+++ b/arch/arm/plat-omap/iommu.c
@@ -320,6 +320,7 @@ void flush_iotlb_page(struct iommu *obj, u32 da)
320 if ((start <= da) && (da < start + bytes)) { 320 if ((start <= da) && (da < start + bytes)) {
321 dev_dbg(obj->dev, "%s: %08x<=%08x(%x)\n", 321 dev_dbg(obj->dev, "%s: %08x<=%08x(%x)\n",
322 __func__, start, da, bytes); 322 __func__, start, da, bytes);
323 iotlb_load_cr(obj, &cr);
323 iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY); 324 iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
324 } 325 }
325 } 326 }
diff --git a/arch/arm/plat-samsung/adc.c b/arch/arm/plat-samsung/adc.c
index 04d9521ddc9f..e8f2be2d67f2 100644
--- a/arch/arm/plat-samsung/adc.c
+++ b/arch/arm/plat-samsung/adc.c
@@ -435,7 +435,6 @@ static int s3c_adc_suspend(struct platform_device *pdev, pm_message_t state)
435static int s3c_adc_resume(struct platform_device *pdev) 435static int s3c_adc_resume(struct platform_device *pdev)
436{ 436{
437 struct adc_device *adc = platform_get_drvdata(pdev); 437 struct adc_device *adc = platform_get_drvdata(pdev);
438 unsigned long flags;
439 438
440 clk_enable(adc->clk); 439 clk_enable(adc->clk);
441 enable_irq(adc->irq); 440 enable_irq(adc->irq);
diff --git a/arch/arm/plat-samsung/clock.c b/arch/arm/plat-samsung/clock.c
index 90a20512d68d..e8d20b0bc50e 100644
--- a/arch/arm/plat-samsung/clock.c
+++ b/arch/arm/plat-samsung/clock.c
@@ -48,6 +48,9 @@
48#include <plat/clock.h> 48#include <plat/clock.h>
49#include <plat/cpu.h> 49#include <plat/cpu.h>
50 50
51#include <linux/serial_core.h>
52#include <plat/regs-serial.h> /* for s3c24xx_uart_devs */
53
51/* clock information */ 54/* clock information */
52 55
53static LIST_HEAD(clocks); 56static LIST_HEAD(clocks);
@@ -65,6 +68,28 @@ static int clk_null_enable(struct clk *clk, int enable)
65 return 0; 68 return 0;
66} 69}
67 70
71static int dev_is_s3c_uart(struct device *dev)
72{
73 struct platform_device **pdev = s3c24xx_uart_devs;
74 int i;
75 for (i = 0; i < ARRAY_SIZE(s3c24xx_uart_devs); i++, pdev++)
76 if (*pdev && dev == &(*pdev)->dev)
77 return 1;
78 return 0;
79}
80
81/*
82 * Serial drivers call get_clock() very early, before platform bus
83 * has been set up, this requires a special check to let them get
84 * a proper clock
85 */
86
87static int dev_is_platform_device(struct device *dev)
88{
89 return dev->bus == &platform_bus_type ||
90 (dev->bus == NULL && dev_is_s3c_uart(dev));
91}
92
68/* Clock API calls */ 93/* Clock API calls */
69 94
70struct clk *clk_get(struct device *dev, const char *id) 95struct clk *clk_get(struct device *dev, const char *id)
@@ -73,7 +98,7 @@ struct clk *clk_get(struct device *dev, const char *id)
73 struct clk *clk = ERR_PTR(-ENOENT); 98 struct clk *clk = ERR_PTR(-ENOENT);
74 int idno; 99 int idno;
75 100
76 if (dev == NULL || dev->bus != &platform_bus_type) 101 if (dev == NULL || !dev_is_platform_device(dev))
77 idno = -1; 102 idno = -1;
78 else 103 else
79 idno = to_platform_device(dev)->id; 104 idno = to_platform_device(dev)->id;
diff --git a/arch/arm/plat-samsung/include/plat/s3c64xx-spi.h b/arch/arm/plat-samsung/include/plat/s3c64xx-spi.h
index e5aba8f95b79..b226f7405e6b 100644
--- a/arch/arm/plat-samsung/include/plat/s3c64xx-spi.h
+++ b/arch/arm/plat-samsung/include/plat/s3c64xx-spi.h
@@ -32,6 +32,8 @@ struct s3c64xx_spi_csinfo {
32 * struct s3c64xx_spi_info - SPI Controller defining structure 32 * struct s3c64xx_spi_info - SPI Controller defining structure
33 * @src_clk_nr: Clock source index for the CLK_CFG[SPI_CLKSEL] field. 33 * @src_clk_nr: Clock source index for the CLK_CFG[SPI_CLKSEL] field.
34 * @src_clk_name: Platform name of the corresponding clock. 34 * @src_clk_name: Platform name of the corresponding clock.
35 * @clk_from_cmu: If the SPI clock/prescalar control block is present
36 * by the platform's clock-management-unit and not in SPI controller.
35 * @num_cs: Number of CS this controller emulates. 37 * @num_cs: Number of CS this controller emulates.
36 * @cfg_gpio: Configure pins for this SPI controller. 38 * @cfg_gpio: Configure pins for this SPI controller.
37 * @fifo_lvl_mask: All tx fifo_lvl fields start at offset-6 39 * @fifo_lvl_mask: All tx fifo_lvl fields start at offset-6
@@ -41,6 +43,7 @@ struct s3c64xx_spi_csinfo {
41struct s3c64xx_spi_info { 43struct s3c64xx_spi_info {
42 int src_clk_nr; 44 int src_clk_nr;
43 char *src_clk_name; 45 char *src_clk_name;
46 bool clk_from_cmu;
44 47
45 int num_cs; 48 int num_cs;
46 49
diff --git a/arch/blackfin/include/asm/bfin5xx_spi.h b/arch/blackfin/include/asm/bfin5xx_spi.h
index ed4f8c6db0cd..4223cf08ce83 100644
--- a/arch/blackfin/include/asm/bfin5xx_spi.h
+++ b/arch/blackfin/include/asm/bfin5xx_spi.h
@@ -11,26 +11,17 @@
11 11
12#define MIN_SPI_BAUD_VAL 2 12#define MIN_SPI_BAUD_VAL 2
13 13
14#define SPI_READ 0
15#define SPI_WRITE 1
16
17#define SPI_CTRL_OFF 0x0
18#define SPI_FLAG_OFF 0x4
19#define SPI_STAT_OFF 0x8
20#define SPI_TXBUFF_OFF 0xc
21#define SPI_RXBUFF_OFF 0x10
22#define SPI_BAUD_OFF 0x14
23#define SPI_SHAW_OFF 0x18
24
25
26#define BIT_CTL_ENABLE 0x4000 14#define BIT_CTL_ENABLE 0x4000
27#define BIT_CTL_OPENDRAIN 0x2000 15#define BIT_CTL_OPENDRAIN 0x2000
28#define BIT_CTL_MASTER 0x1000 16#define BIT_CTL_MASTER 0x1000
29#define BIT_CTL_POLAR 0x0800 17#define BIT_CTL_CPOL 0x0800
30#define BIT_CTL_PHASE 0x0400 18#define BIT_CTL_CPHA 0x0400
31#define BIT_CTL_BITORDER 0x0200 19#define BIT_CTL_LSBF 0x0200
32#define BIT_CTL_WORDSIZE 0x0100 20#define BIT_CTL_WORDSIZE 0x0100
33#define BIT_CTL_MISOENABLE 0x0020 21#define BIT_CTL_EMISO 0x0020
22#define BIT_CTL_PSSE 0x0010
23#define BIT_CTL_GM 0x0008
24#define BIT_CTL_SZ 0x0004
34#define BIT_CTL_RXMOD 0x0000 25#define BIT_CTL_RXMOD 0x0000
35#define BIT_CTL_TXMOD 0x0001 26#define BIT_CTL_TXMOD 0x0001
36#define BIT_CTL_TIMOD_DMA_TX 0x0003 27#define BIT_CTL_TIMOD_DMA_TX 0x0003
@@ -50,61 +41,7 @@
50#define BIT_STU_SENDOVER 0x0001 41#define BIT_STU_SENDOVER 0x0001
51#define BIT_STU_RECVFULL 0x0020 42#define BIT_STU_RECVFULL 0x0020
52 43
53#define CFG_SPI_ENABLE 1 44#define MAX_CTRL_CS 8 /* cs in spi controller */
54#define CFG_SPI_DISABLE 0
55
56#define CFG_SPI_OUTENABLE 1
57#define CFG_SPI_OUTDISABLE 0
58
59#define CFG_SPI_ACTLOW 1
60#define CFG_SPI_ACTHIGH 0
61
62#define CFG_SPI_PHASESTART 1
63#define CFG_SPI_PHASEMID 0
64
65#define CFG_SPI_MASTER 1
66#define CFG_SPI_SLAVE 0
67
68#define CFG_SPI_SENELAST 0
69#define CFG_SPI_SENDZERO 1
70
71#define CFG_SPI_RCVFLUSH 1
72#define CFG_SPI_RCVDISCARD 0
73
74#define CFG_SPI_LSBFIRST 1
75#define CFG_SPI_MSBFIRST 0
76
77#define CFG_SPI_WORDSIZE16 1
78#define CFG_SPI_WORDSIZE8 0
79
80#define CFG_SPI_MISOENABLE 1
81#define CFG_SPI_MISODISABLE 0
82
83#define CFG_SPI_READ 0x00
84#define CFG_SPI_WRITE 0x01
85#define CFG_SPI_DMAREAD 0x02
86#define CFG_SPI_DMAWRITE 0x03
87
88#define CFG_SPI_CSCLEARALL 0
89#define CFG_SPI_CHIPSEL1 1
90#define CFG_SPI_CHIPSEL2 2
91#define CFG_SPI_CHIPSEL3 3
92#define CFG_SPI_CHIPSEL4 4
93#define CFG_SPI_CHIPSEL5 5
94#define CFG_SPI_CHIPSEL6 6
95#define CFG_SPI_CHIPSEL7 7
96
97#define CFG_SPI_CS1VALUE 1
98#define CFG_SPI_CS2VALUE 2
99#define CFG_SPI_CS3VALUE 3
100#define CFG_SPI_CS4VALUE 4
101#define CFG_SPI_CS5VALUE 5
102#define CFG_SPI_CS6VALUE 6
103#define CFG_SPI_CS7VALUE 7
104
105#define CMD_SPI_SET_BAUDRATE 2
106#define CMD_SPI_GET_SYSTEMCLOCK 25
107#define CMD_SPI_SET_WRITECONTINUOUS 26
108 45
109/* device.platform_data for SSP controller devices */ 46/* device.platform_data for SSP controller devices */
110struct bfin5xx_spi_master { 47struct bfin5xx_spi_master {
@@ -120,9 +57,7 @@ struct bfin5xx_spi_chip {
120 u16 ctl_reg; 57 u16 ctl_reg;
121 u8 enable_dma; 58 u8 enable_dma;
122 u8 bits_per_word; 59 u8 bits_per_word;
123 u8 cs_change_per_word;
124 u16 cs_chg_udelay; /* Some devices require 16-bit delays */ 60 u16 cs_chg_udelay; /* Some devices require 16-bit delays */
125 u32 cs_gpio;
126 /* Value to send if no TX value is supplied, usually 0x0 or 0xFFFF */ 61 /* Value to send if no TX value is supplied, usually 0x0 or 0xFFFF */
127 u16 idle_tx_val; 62 u16 idle_tx_val;
128 u8 pio_interrupt; /* Enable spi data irq */ 63 u8 pio_interrupt; /* Enable spi data irq */
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 16399bd24993..0f2417df6323 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -7,6 +7,7 @@ config FRV
7 default y 7 default y
8 select HAVE_IDE 8 select HAVE_IDE
9 select HAVE_ARCH_TRACEHOOK 9 select HAVE_ARCH_TRACEHOOK
10 select HAVE_IRQ_WORK
10 select HAVE_PERF_EVENTS 11 select HAVE_PERF_EVENTS
11 12
12config ZONE_DMA 13config ZONE_DMA
diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile
index f4709756d0d9..4ff2fb1e6b16 100644
--- a/arch/frv/lib/Makefile
+++ b/arch/frv/lib/Makefile
@@ -5,4 +5,4 @@
5lib-y := \ 5lib-y := \
6 __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \ 6 __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
7 checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \ 7 checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
8 outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o 8 outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o
diff --git a/arch/frv/lib/perf_event.c b/arch/frv/lib/perf_event.c
deleted file mode 100644
index 9ac5acfd2e91..000000000000
--- a/arch/frv/lib/perf_event.c
+++ /dev/null
@@ -1,19 +0,0 @@
1/* Performance event handling
2 *
3 * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/perf_event.h>
13
14/*
15 * mark the performance event as pending
16 */
17void set_perf_event_pending(void)
18{
19}
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index ba22849ee3ec..7c82fa1fc911 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -53,6 +53,9 @@ config MMU
53 bool 53 bool
54 default y 54 default y
55 55
56config ARCH_DMA_ADDR_T_64BIT
57 def_bool y
58
56config NEED_DMA_MAP_STATE 59config NEED_DMA_MAP_STATE
57 def_bool y 60 def_bool y
58 61
@@ -62,6 +65,9 @@ config NEED_SG_DMA_LENGTH
62config SWIOTLB 65config SWIOTLB
63 bool 66 bool
64 67
68config STACKTRACE_SUPPORT
69 def_bool y
70
65config GENERIC_LOCKBREAK 71config GENERIC_LOCKBREAK
66 def_bool n 72 def_bool n
67 73
@@ -683,8 +689,10 @@ source "lib/Kconfig"
683# Use the generic interrupt handling code in kernel/irq/: 689# Use the generic interrupt handling code in kernel/irq/:
684# 690#
685config GENERIC_HARDIRQS 691config GENERIC_HARDIRQS
686 bool 692 def_bool y
687 default y 693
694config GENERIC_HARDIRQS_NO__DO_IRQ
695 def_bool y
688 696
689config GENERIC_IRQ_PROBE 697config GENERIC_IRQ_PROBE
690 bool 698 bool
diff --git a/arch/ia64/include/asm/compat.h b/arch/ia64/include/asm/compat.h
deleted file mode 100644
index 9301a2821615..000000000000
--- a/arch/ia64/include/asm/compat.h
+++ /dev/null
@@ -1,208 +0,0 @@
1#ifndef _ASM_IA64_COMPAT_H
2#define _ASM_IA64_COMPAT_H
3/*
4 * Architecture specific compatibility types
5 */
6#include <linux/types.h>
7
8#define COMPAT_USER_HZ 100
9#define COMPAT_UTS_MACHINE "i686\0\0\0"
10
11typedef u32 compat_size_t;
12typedef s32 compat_ssize_t;
13typedef s32 compat_time_t;
14typedef s32 compat_clock_t;
15typedef s32 compat_key_t;
16typedef s32 compat_pid_t;
17typedef u16 __compat_uid_t;
18typedef u16 __compat_gid_t;
19typedef u32 __compat_uid32_t;
20typedef u32 __compat_gid32_t;
21typedef u16 compat_mode_t;
22typedef u32 compat_ino_t;
23typedef u16 compat_dev_t;
24typedef s32 compat_off_t;
25typedef s64 compat_loff_t;
26typedef u16 compat_nlink_t;
27typedef u16 compat_ipc_pid_t;
28typedef s32 compat_daddr_t;
29typedef u32 compat_caddr_t;
30typedef __kernel_fsid_t compat_fsid_t;
31typedef s32 compat_timer_t;
32
33typedef s32 compat_int_t;
34typedef s32 compat_long_t;
35typedef s64 __attribute__((aligned(4))) compat_s64;
36typedef u32 compat_uint_t;
37typedef u32 compat_ulong_t;
38typedef u64 __attribute__((aligned(4))) compat_u64;
39
40struct compat_timespec {
41 compat_time_t tv_sec;
42 s32 tv_nsec;
43};
44
45struct compat_timeval {
46 compat_time_t tv_sec;
47 s32 tv_usec;
48};
49
50struct compat_stat {
51 compat_dev_t st_dev;
52 u16 __pad1;
53 compat_ino_t st_ino;
54 compat_mode_t st_mode;
55 compat_nlink_t st_nlink;
56 __compat_uid_t st_uid;
57 __compat_gid_t st_gid;
58 compat_dev_t st_rdev;
59 u16 __pad2;
60 u32 st_size;
61 u32 st_blksize;
62 u32 st_blocks;
63 u32 st_atime;
64 u32 st_atime_nsec;
65 u32 st_mtime;
66 u32 st_mtime_nsec;
67 u32 st_ctime;
68 u32 st_ctime_nsec;
69 u32 __unused4;
70 u32 __unused5;
71};
72
73struct compat_flock {
74 short l_type;
75 short l_whence;
76 compat_off_t l_start;
77 compat_off_t l_len;
78 compat_pid_t l_pid;
79};
80
81#define F_GETLK64 12
82#define F_SETLK64 13
83#define F_SETLKW64 14
84
85/*
86 * IA32 uses 4 byte alignment for 64 bit quantities,
87 * so we need to pack this structure.
88 */
89struct compat_flock64 {
90 short l_type;
91 short l_whence;
92 compat_loff_t l_start;
93 compat_loff_t l_len;
94 compat_pid_t l_pid;
95} __attribute__((packed));
96
97struct compat_statfs {
98 int f_type;
99 int f_bsize;
100 int f_blocks;
101 int f_bfree;
102 int f_bavail;
103 int f_files;
104 int f_ffree;
105 compat_fsid_t f_fsid;
106 int f_namelen; /* SunOS ignores this field. */
107 int f_frsize;
108 int f_spare[5];
109};
110
111#define COMPAT_RLIM_OLD_INFINITY 0x7fffffff
112#define COMPAT_RLIM_INFINITY 0xffffffff
113
114typedef u32 compat_old_sigset_t; /* at least 32 bits */
115
116#define _COMPAT_NSIG 64
117#define _COMPAT_NSIG_BPW 32
118
119typedef u32 compat_sigset_word;
120
121#define COMPAT_OFF_T_MAX 0x7fffffff
122#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL
123
124struct compat_ipc64_perm {
125 compat_key_t key;
126 __compat_uid32_t uid;
127 __compat_gid32_t gid;
128 __compat_uid32_t cuid;
129 __compat_gid32_t cgid;
130 unsigned short mode;
131 unsigned short __pad1;
132 unsigned short seq;
133 unsigned short __pad2;
134 compat_ulong_t unused1;
135 compat_ulong_t unused2;
136};
137
138struct compat_semid64_ds {
139 struct compat_ipc64_perm sem_perm;
140 compat_time_t sem_otime;
141 compat_ulong_t __unused1;
142 compat_time_t sem_ctime;
143 compat_ulong_t __unused2;
144 compat_ulong_t sem_nsems;
145 compat_ulong_t __unused3;
146 compat_ulong_t __unused4;
147};
148
149struct compat_msqid64_ds {
150 struct compat_ipc64_perm msg_perm;
151 compat_time_t msg_stime;
152 compat_ulong_t __unused1;
153 compat_time_t msg_rtime;
154 compat_ulong_t __unused2;
155 compat_time_t msg_ctime;
156 compat_ulong_t __unused3;
157 compat_ulong_t msg_cbytes;
158 compat_ulong_t msg_qnum;
159 compat_ulong_t msg_qbytes;
160 compat_pid_t msg_lspid;
161 compat_pid_t msg_lrpid;
162 compat_ulong_t __unused4;
163 compat_ulong_t __unused5;
164};
165
166struct compat_shmid64_ds {
167 struct compat_ipc64_perm shm_perm;
168 compat_size_t shm_segsz;
169 compat_time_t shm_atime;
170 compat_ulong_t __unused1;
171 compat_time_t shm_dtime;
172 compat_ulong_t __unused2;
173 compat_time_t shm_ctime;
174 compat_ulong_t __unused3;
175 compat_pid_t shm_cpid;
176 compat_pid_t shm_lpid;
177 compat_ulong_t shm_nattch;
178 compat_ulong_t __unused4;
179 compat_ulong_t __unused5;
180};
181
182/*
183 * A pointer passed in from user mode. This should not be used for syscall parameters,
184 * just declare them as pointers because the syscall entry code will have appropriately
185 * converted them already.
186 */
187typedef u32 compat_uptr_t;
188
189static inline void __user *
190compat_ptr (compat_uptr_t uptr)
191{
192 return (void __user *) (unsigned long) uptr;
193}
194
195static inline compat_uptr_t
196ptr_to_compat(void __user *uptr)
197{
198 return (u32)(unsigned long)uptr;
199}
200
201static __inline__ void __user *
202arch_compat_alloc_user_space (long len)
203{
204 struct pt_regs *regs = task_pt_regs(current);
205 return (void __user *) (((regs->r12 & 0xffffffff) & -16) - len);
206}
207
208#endif /* _ASM_IA64_COMPAT_H */
diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h
index d514cd9edb49..8fb7d33a661f 100644
--- a/arch/ia64/include/asm/hardirq.h
+++ b/arch/ia64/include/asm/hardirq.h
@@ -6,12 +6,6 @@
6 * David Mosberger-Tang <davidm@hpl.hp.com> 6 * David Mosberger-Tang <davidm@hpl.hp.com>
7 */ 7 */
8 8
9
10#include <linux/threads.h>
11#include <linux/irq.h>
12
13#include <asm/processor.h>
14
15/* 9/*
16 * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure. 10 * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure.
17 */ 11 */
@@ -20,6 +14,11 @@
20 14
21#define local_softirq_pending() (local_cpu_data->softirq_pending) 15#define local_softirq_pending() (local_cpu_data->softirq_pending)
22 16
17#include <linux/threads.h>
18#include <linux/irq.h>
19
20#include <asm/processor.h>
21
23extern void __iomem *ipi_base_addr; 22extern void __iomem *ipi_base_addr;
24 23
25void ack_bad_irq(unsigned int irq); 24void ack_bad_irq(unsigned int irq);
diff --git a/arch/ia64/include/asm/iommu_table.h b/arch/ia64/include/asm/iommu_table.h
new file mode 100644
index 000000000000..92c8d36ae5ae
--- /dev/null
+++ b/arch/ia64/include/asm/iommu_table.h
@@ -0,0 +1,6 @@
1#ifndef _ASM_IA64_IOMMU_TABLE_H
2#define _ASM_IA64_IOMMU_TABLE_H
3
4#define IOMMU_INIT_POST(_detect)
5
6#endif /* _ASM_IA64_IOMMU_TABLE_H */
diff --git a/arch/ia64/include/asm/system.h b/arch/ia64/include/asm/system.h
index 2feb7f64c035..6cca30705d50 100644
--- a/arch/ia64/include/asm/system.h
+++ b/arch/ia64/include/asm/system.h
@@ -196,10 +196,6 @@ void cpu_idle_wait(void);
196 196
197void default_idle(void); 197void default_idle(void);
198 198
199#ifdef CONFIG_VIRT_CPU_ACCOUNTING
200extern void account_system_vtime(struct task_struct *);
201#endif
202
203#endif /* __KERNEL__ */ 199#endif /* __KERNEL__ */
204 200
205#endif /* __ASSEMBLY__ */ 201#endif /* __ASSEMBLY__ */
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index db10b1e378b0..395c2f216dd8 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_AUDIT) += audit.o
34obj-$(CONFIG_PCI_MSI) += msi_ia64.o 34obj-$(CONFIG_PCI_MSI) += msi_ia64.o
35mca_recovery-y += mca_drv.o mca_drv_asm.o 35mca_recovery-y += mca_drv.o mca_drv_asm.o
36obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o 36obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
37obj-$(CONFIG_STACKTRACE) += stacktrace.o
37 38
38obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o \ 39obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o \
39 paravirt_patch.o 40 paravirt_patch.o
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
index 71e35864d2e2..d52f1f78eff2 100644
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@@ -59,13 +59,13 @@ int __init init_cyclone_clock(void)
59 return -ENODEV; 59 return -ENODEV;
60 } 60 }
61 base = readq(reg); 61 base = readq(reg);
62 iounmap(reg);
62 if(!base){ 63 if(!base){
63 printk(KERN_ERR "Summit chipset: Could not find valid CBAR" 64 printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
64 " value.\n"); 65 " value.\n");
65 use_cyclone = 0; 66 use_cyclone = 0;
66 return -ENODEV; 67 return -ENODEV;
67 } 68 }
68 iounmap(reg);
69 69
70 /* setup PMCC */ 70 /* setup PMCC */
71 offset = (base + CYCLONE_PMCC_OFFSET); 71 offset = (base + CYCLONE_PMCC_OFFSET);
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 7ded76658d2d..22c38404f539 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -108,10 +108,6 @@
108#define DBG(fmt...) 108#define DBG(fmt...)
109#endif 109#endif
110 110
111#define NR_PREALLOCATE_RTE_ENTRIES \
112 (PAGE_SIZE / sizeof(struct iosapic_rte_info))
113#define RTE_PREALLOCATED (1)
114
115static DEFINE_SPINLOCK(iosapic_lock); 111static DEFINE_SPINLOCK(iosapic_lock);
116 112
117/* 113/*
@@ -136,7 +132,6 @@ struct iosapic_rte_info {
136 struct list_head rte_list; /* RTEs sharing the same vector */ 132 struct list_head rte_list; /* RTEs sharing the same vector */
137 char rte_index; /* IOSAPIC RTE index */ 133 char rte_index; /* IOSAPIC RTE index */
138 int refcnt; /* reference counter */ 134 int refcnt; /* reference counter */
139 unsigned int flags; /* flags */
140 struct iosapic *iosapic; 135 struct iosapic *iosapic;
141} ____cacheline_aligned; 136} ____cacheline_aligned;
142 137
@@ -155,9 +150,6 @@ static struct iosapic_intr_info {
155 150
156static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */ 151static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */
157 152
158static int iosapic_kmalloc_ok;
159static LIST_HEAD(free_rte_list);
160
161static inline void 153static inline void
162iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val) 154iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val)
163{ 155{
@@ -394,7 +386,7 @@ iosapic_startup_level_irq (unsigned int irq)
394} 386}
395 387
396static void 388static void
397iosapic_end_level_irq (unsigned int irq) 389iosapic_unmask_level_irq (unsigned int irq)
398{ 390{
399 ia64_vector vec = irq_to_vector(irq); 391 ia64_vector vec = irq_to_vector(irq);
400 struct iosapic_rte_info *rte; 392 struct iosapic_rte_info *rte;
@@ -404,7 +396,8 @@ iosapic_end_level_irq (unsigned int irq)
404 if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { 396 if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
405 do_unmask_irq = 1; 397 do_unmask_irq = 1;
406 mask_irq(irq); 398 mask_irq(irq);
407 } 399 } else
400 unmask_irq(irq);
408 401
409 list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) 402 list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
410 iosapic_eoi(rte->iosapic->addr, vec); 403 iosapic_eoi(rte->iosapic->addr, vec);
@@ -427,9 +420,8 @@ static struct irq_chip irq_type_iosapic_level = {
427 .enable = iosapic_enable_level_irq, 420 .enable = iosapic_enable_level_irq,
428 .disable = iosapic_disable_level_irq, 421 .disable = iosapic_disable_level_irq,
429 .ack = iosapic_ack_level_irq, 422 .ack = iosapic_ack_level_irq,
430 .end = iosapic_end_level_irq,
431 .mask = mask_irq, 423 .mask = mask_irq,
432 .unmask = unmask_irq, 424 .unmask = iosapic_unmask_level_irq,
433 .set_affinity = iosapic_set_affinity 425 .set_affinity = iosapic_set_affinity
434}; 426};
435 427
@@ -552,37 +544,6 @@ iosapic_reassign_vector (int irq)
552 } 544 }
553} 545}
554 546
555static struct iosapic_rte_info * __init_refok iosapic_alloc_rte (void)
556{
557 int i;
558 struct iosapic_rte_info *rte;
559 int preallocated = 0;
560
561 if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
562 rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
563 NR_PREALLOCATE_RTE_ENTRIES);
564 for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
565 list_add(&rte->rte_list, &free_rte_list);
566 }
567
568 if (!list_empty(&free_rte_list)) {
569 rte = list_entry(free_rte_list.next, struct iosapic_rte_info,
570 rte_list);
571 list_del(&rte->rte_list);
572 preallocated++;
573 } else {
574 rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC);
575 if (!rte)
576 return NULL;
577 }
578
579 memset(rte, 0, sizeof(struct iosapic_rte_info));
580 if (preallocated)
581 rte->flags |= RTE_PREALLOCATED;
582
583 return rte;
584}
585
586static inline int irq_is_shared (int irq) 547static inline int irq_is_shared (int irq)
587{ 548{
588 return (iosapic_intr_info[irq].count > 1); 549 return (iosapic_intr_info[irq].count > 1);
@@ -615,7 +576,7 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery,
615 576
616 rte = find_rte(irq, gsi); 577 rte = find_rte(irq, gsi);
617 if (!rte) { 578 if (!rte) {
618 rte = iosapic_alloc_rte(); 579 rte = kzalloc(sizeof (*rte), GFP_ATOMIC);
619 if (!rte) { 580 if (!rte) {
620 printk(KERN_WARNING "%s: cannot allocate memory\n", 581 printk(KERN_WARNING "%s: cannot allocate memory\n",
621 __func__); 582 __func__);
@@ -658,6 +619,10 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery,
658 idesc->chip->name, irq_type->name); 619 idesc->chip->name, irq_type->name);
659 idesc->chip = irq_type; 620 idesc->chip = irq_type;
660 } 621 }
622 if (trigger == IOSAPIC_EDGE)
623 __set_irq_handler_unlocked(irq, handle_edge_irq);
624 else
625 __set_irq_handler_unlocked(irq, handle_level_irq);
661 return 0; 626 return 0;
662} 627}
663 628
@@ -1161,10 +1126,3 @@ map_iosapic_to_node(unsigned int gsi_base, int node)
1161 return; 1126 return;
1162} 1127}
1163#endif 1128#endif
1164
1165static int __init iosapic_enable_kmalloc (void)
1166{
1167 iosapic_kmalloc_ok = 1;
1168 return 0;
1169}
1170core_initcall (iosapic_enable_kmalloc);
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index f14c35f9b03a..9a26015c3e50 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -30,6 +30,7 @@
30#include <linux/bitops.h> 30#include <linux/bitops.h>
31#include <linux/irq.h> 31#include <linux/irq.h>
32#include <linux/ratelimit.h> 32#include <linux/ratelimit.h>
33#include <linux/acpi.h>
33 34
34#include <asm/delay.h> 35#include <asm/delay.h>
35#include <asm/intrinsics.h> 36#include <asm/intrinsics.h>
@@ -635,6 +636,7 @@ ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action)
635 desc->chip = &irq_type_ia64_lsapic; 636 desc->chip = &irq_type_ia64_lsapic;
636 if (action) 637 if (action)
637 setup_irq(irq, action); 638 setup_irq(irq, action);
639 set_irq_handler(irq, handle_percpu_irq);
638} 640}
639 641
640void __init 642void __init
@@ -650,6 +652,9 @@ ia64_native_register_ipi(void)
650void __init 652void __init
651init_IRQ (void) 653init_IRQ (void)
652{ 654{
655#ifdef CONFIG_ACPI
656 acpi_boot_init();
657#endif
653 ia64_register_ipi(); 658 ia64_register_ipi();
654 register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL); 659 register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
655#ifdef CONFIG_SMP 660#ifdef CONFIG_SMP
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index a0220dc5ff42..1753f6a30d55 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -2055,25 +2055,6 @@ ia64_mca_init(void)
2055 2055
2056 IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __func__); 2056 IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __func__);
2057 2057
2058 /*
2059 * Configure the CMCI/P vector and handler. Interrupts for CMC are
2060 * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c).
2061 */
2062 register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction);
2063 register_percpu_irq(IA64_CMCP_VECTOR, &cmcp_irqaction);
2064 ia64_mca_cmc_vector_setup(); /* Setup vector on BSP */
2065
2066 /* Setup the MCA rendezvous interrupt vector */
2067 register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, &mca_rdzv_irqaction);
2068
2069 /* Setup the MCA wakeup interrupt vector */
2070 register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction);
2071
2072#ifdef CONFIG_ACPI
2073 /* Setup the CPEI/P handler */
2074 register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction);
2075#endif
2076
2077 /* Initialize the areas set aside by the OS to buffer the 2058 /* Initialize the areas set aside by the OS to buffer the
2078 * platform/processor error states for MCA/INIT/CMC 2059 * platform/processor error states for MCA/INIT/CMC
2079 * handling. 2060 * handling.
@@ -2103,6 +2084,25 @@ ia64_mca_late_init(void)
2103 if (!mca_init) 2084 if (!mca_init)
2104 return 0; 2085 return 0;
2105 2086
2087 /*
2088 * Configure the CMCI/P vector and handler. Interrupts for CMC are
2089 * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c).
2090 */
2091 register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction);
2092 register_percpu_irq(IA64_CMCP_VECTOR, &cmcp_irqaction);
2093 ia64_mca_cmc_vector_setup(); /* Setup vector on BSP */
2094
2095 /* Setup the MCA rendezvous interrupt vector */
2096 register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, &mca_rdzv_irqaction);
2097
2098 /* Setup the MCA wakeup interrupt vector */
2099 register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction);
2100
2101#ifdef CONFIG_ACPI
2102 /* Setup the CPEI/P handler */
2103 register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction);
2104#endif
2105
2106 register_hotcpu_notifier(&mca_cpu_notifier); 2106 register_hotcpu_notifier(&mca_cpu_notifier);
2107 2107
2108 /* Setup the CMCI/P vector and handler */ 2108 /* Setup the CMCI/P vector and handler */
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 4a746ea838ff..00b19a416eab 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -104,8 +104,8 @@ static int ia64_msi_retrigger_irq(unsigned int irq)
104 */ 104 */
105static struct irq_chip ia64_msi_chip = { 105static struct irq_chip ia64_msi_chip = {
106 .name = "PCI-MSI", 106 .name = "PCI-MSI",
107 .mask = mask_msi_irq, 107 .irq_mask = mask_msi_irq,
108 .unmask = unmask_msi_irq, 108 .irq_unmask = unmask_msi_irq,
109 .ack = ia64_ack_msi_irq, 109 .ack = ia64_ack_msi_irq,
110#ifdef CONFIG_SMP 110#ifdef CONFIG_SMP
111 .set_affinity = ia64_set_msi_irq_affinity, 111 .set_affinity = ia64_set_msi_irq_affinity,
@@ -160,8 +160,8 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
160 160
161static struct irq_chip dmar_msi_type = { 161static struct irq_chip dmar_msi_type = {
162 .name = "DMAR_MSI", 162 .name = "DMAR_MSI",
163 .unmask = dmar_msi_unmask, 163 .irq_unmask = dmar_msi_unmask,
164 .mask = dmar_msi_mask, 164 .irq_mask = dmar_msi_mask,
165 .ack = ia64_ack_msi_irq, 165 .ack = ia64_ack_msi_irq,
166#ifdef CONFIG_SMP 166#ifdef CONFIG_SMP
167 .set_affinity = dmar_msi_set_affinity, 167 .set_affinity = dmar_msi_set_affinity,
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index fdf6f9d013e5..77597e5ea60a 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -434,7 +434,7 @@ register_info(char *page)
434 unsigned long phys_stacked; 434 unsigned long phys_stacked;
435 pal_hints_u_t hints; 435 pal_hints_u_t hints;
436 unsigned long iregs, dregs; 436 unsigned long iregs, dregs;
437 char *info_type[]={ 437 static const char * const info_type[] = {
438 "Implemented AR(s)", 438 "Implemented AR(s)",
439 "AR(s) with read side-effects", 439 "AR(s) with read side-effects",
440 "Implemented CR(s)", 440 "Implemented CR(s)",
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index cce050e85c73..6b1852f7f972 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -1573,7 +1573,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
1573 return -EINVAL; 1573 return -EINVAL;
1574 } 1574 }
1575 1575
1576 ctx = (pfm_context_t *)filp->private_data; 1576 ctx = filp->private_data;
1577 if (ctx == NULL) { 1577 if (ctx == NULL) {
1578 printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current)); 1578 printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current));
1579 return -EINVAL; 1579 return -EINVAL;
@@ -1673,7 +1673,7 @@ pfm_poll(struct file *filp, poll_table * wait)
1673 return 0; 1673 return 0;
1674 } 1674 }
1675 1675
1676 ctx = (pfm_context_t *)filp->private_data; 1676 ctx = filp->private_data;
1677 if (ctx == NULL) { 1677 if (ctx == NULL) {
1678 printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current)); 1678 printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current));
1679 return 0; 1679 return 0;
@@ -1733,7 +1733,7 @@ pfm_fasync(int fd, struct file *filp, int on)
1733 return -EBADF; 1733 return -EBADF;
1734 } 1734 }
1735 1735
1736 ctx = (pfm_context_t *)filp->private_data; 1736 ctx = filp->private_data;
1737 if (ctx == NULL) { 1737 if (ctx == NULL) {
1738 printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current)); 1738 printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current));
1739 return -EBADF; 1739 return -EBADF;
@@ -1841,7 +1841,7 @@ pfm_flush(struct file *filp, fl_owner_t id)
1841 return -EBADF; 1841 return -EBADF;
1842 } 1842 }
1843 1843
1844 ctx = (pfm_context_t *)filp->private_data; 1844 ctx = filp->private_data;
1845 if (ctx == NULL) { 1845 if (ctx == NULL) {
1846 printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current)); 1846 printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current));
1847 return -EBADF; 1847 return -EBADF;
@@ -1984,7 +1984,7 @@ pfm_close(struct inode *inode, struct file *filp)
1984 return -EBADF; 1984 return -EBADF;
1985 } 1985 }
1986 1986
1987 ctx = (pfm_context_t *)filp->private_data; 1987 ctx = filp->private_data;
1988 if (ctx == NULL) { 1988 if (ctx == NULL) {
1989 printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current)); 1989 printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current));
1990 return -EBADF; 1990 return -EBADF;
@@ -4907,7 +4907,7 @@ restart_args:
4907 goto error_args; 4907 goto error_args;
4908 } 4908 }
4909 4909
4910 ctx = (pfm_context_t *)file->private_data; 4910 ctx = file->private_data;
4911 if (unlikely(ctx == NULL)) { 4911 if (unlikely(ctx == NULL)) {
4912 DPRINT(("no context for fd %d\n", fd)); 4912 DPRINT(("no context for fd %d\n", fd));
4913 goto error_args; 4913 goto error_args;
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index aa8b5fa1a8de..45d7543b69cc 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -642,7 +642,7 @@ salinfo_init(void)
642 for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) { 642 for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
643 data = salinfo_data + i; 643 data = salinfo_data + i;
644 data->type = i; 644 data->type = i;
645 init_MUTEX(&data->mutex); 645 sema_init(&data->mutex, 1);
646 dir = proc_mkdir(salinfo_log_name[i], salinfo_dir); 646 dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
647 if (!dir) 647 if (!dir)
648 continue; 648 continue;
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 8fb958abf8d0..911cf9749700 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -594,10 +594,6 @@ setup_arch (char **cmdline_p)
594 cpu_init(); /* initialize the bootstrap CPU */ 594 cpu_init(); /* initialize the bootstrap CPU */
595 mmu_context_init(); /* initialize context_id bitmap */ 595 mmu_context_init(); /* initialize context_id bitmap */
596 596
597#ifdef CONFIG_ACPI
598 acpi_boot_init();
599#endif
600
601 paravirt_banner(); 597 paravirt_banner();
602 paravirt_arch_setup_console(cmdline_p); 598 paravirt_arch_setup_console(cmdline_p);
603 599
diff --git a/arch/ia64/kernel/stacktrace.c b/arch/ia64/kernel/stacktrace.c
new file mode 100644
index 000000000000..5af2783a87f4
--- /dev/null
+++ b/arch/ia64/kernel/stacktrace.c
@@ -0,0 +1,39 @@
1/*
2 * arch/ia64/kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 */
7#include <linux/sched.h>
8#include <linux/stacktrace.h>
9#include <linux/module.h>
10
11static void
12ia64_do_save_stack(struct unw_frame_info *info, void *arg)
13{
14 struct stack_trace *trace = arg;
15 unsigned long ip;
16 int skip = trace->skip;
17
18 trace->nr_entries = 0;
19 do {
20 unw_get_ip(info, &ip);
21 if (ip == 0)
22 break;
23 if (skip == 0) {
24 trace->entries[trace->nr_entries++] = ip;
25 if (trace->nr_entries == trace->max_entries)
26 break;
27 } else
28 skip--;
29 } while (unw_unwind(info) >= 0);
30}
31
32/*
33 * Save stack-backtrace addresses into a stack_trace buffer.
34 */
35void save_stack_trace(struct stack_trace *trace)
36{
37 unw_init_running(ia64_do_save_stack, trace);
38}
39EXPORT_SYMBOL(save_stack_trace);
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index b6c0e63a0bf6..fed6afa2e8a9 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -1204,10 +1204,10 @@ desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word
1204static inline unw_hash_index_t 1204static inline unw_hash_index_t
1205hash (unsigned long ip) 1205hash (unsigned long ip)
1206{ 1206{
1207# define hashmagic 0x9e3779b97f4a7c16UL /* based on (sqrt(5)/2-1)*2^64 */ 1207 /* magic number = ((sqrt(5)-1)/2)*2^64 */
1208 static const unsigned long hashmagic = 0x9e3779b97f4a7c16UL;
1208 1209
1209 return (ip >> 4)*hashmagic >> (64 - UNW_LOG_HASH_SIZE); 1210 return (ip >> 4) * hashmagic >> (64 - UNW_LOG_HASH_SIZE);
1210#undef hashmagic
1211} 1211}
1212 1212
1213static inline long 1213static inline long
@@ -1531,7 +1531,7 @@ build_script (struct unw_frame_info *info)
1531 struct unw_labeled_state *ls, *next; 1531 struct unw_labeled_state *ls, *next;
1532 unsigned long ip = info->ip; 1532 unsigned long ip = info->ip;
1533 struct unw_state_record sr; 1533 struct unw_state_record sr;
1534 struct unw_table *table; 1534 struct unw_table *table, *prev;
1535 struct unw_reg_info *r; 1535 struct unw_reg_info *r;
1536 struct unw_insn insn; 1536 struct unw_insn insn;
1537 u8 *dp, *desc_end; 1537 u8 *dp, *desc_end;
@@ -1560,11 +1560,26 @@ build_script (struct unw_frame_info *info)
1560 1560
1561 STAT(parse_start = ia64_get_itc()); 1561 STAT(parse_start = ia64_get_itc());
1562 1562
1563 prev = NULL;
1563 for (table = unw.tables; table; table = table->next) { 1564 for (table = unw.tables; table; table = table->next) {
1564 if (ip >= table->start && ip < table->end) { 1565 if (ip >= table->start && ip < table->end) {
1566 /*
1567 * Leave the kernel unwind table at the very front,
1568 * lest moving it breaks some assumption elsewhere.
1569 * Otherwise, move the matching table to the second
1570 * position in the list so that traversals can benefit
1571 * from commonality in backtrace paths.
1572 */
1573 if (prev && prev != unw.tables) {
1574 /* unw is safe - we're already spinlocked */
1575 prev->next = table->next;
1576 table->next = unw.tables->next;
1577 unw.tables->next = table;
1578 }
1565 e = lookup(table, ip - table->segment_base); 1579 e = lookup(table, ip - table->segment_base);
1566 break; 1580 break;
1567 } 1581 }
1582 prev = table;
1568 } 1583 }
1569 if (!e) { 1584 if (!e) {
1570 /* no info, return default unwinder (leaf proc, no mem stack, no saved regs) */ 1585 /* no info, return default unwinder (leaf proc, no mem stack, no saved regs) */
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 0c72dd463831..a5e500f02853 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -228,8 +228,8 @@ static int sn_msi_retrigger_irq(unsigned int irq)
228 228
229static struct irq_chip sn_msi_chip = { 229static struct irq_chip sn_msi_chip = {
230 .name = "PCI-MSI", 230 .name = "PCI-MSI",
231 .mask = mask_msi_irq, 231 .irq_mask = mask_msi_irq,
232 .unmask = unmask_msi_irq, 232 .irq_unmask = unmask_msi_irq,
233 .ack = sn_ack_msi_irq, 233 .ack = sn_ack_msi_irq,
234#ifdef CONFIG_SMP 234#ifdef CONFIG_SMP
235 .set_affinity = sn_set_msi_irq_affinity, 235 .set_affinity = sn_set_msi_irq_affinity,
diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c
index 8adc6a14272a..3e8d350fdf39 100644
--- a/arch/ia64/xen/xen_pv_ops.c
+++ b/arch/ia64/xen/xen_pv_ops.c
@@ -1136,7 +1136,6 @@ __initconst = {
1136static void __init 1136static void __init
1137xen_patch_branch(unsigned long tag, unsigned long type) 1137xen_patch_branch(unsigned long tag, unsigned long type)
1138{ 1138{
1139 const unsigned long nelem = 1139 __paravirt_patch_apply_branch(tag, type, xen_branch_target,
1140 sizeof(xen_branch_target) / sizeof(xen_branch_target[0]); 1140 ARRAY_SIZE(xen_branch_target));
1141 __paravirt_patch_apply_branch(tag, type, xen_branch_target, nelem);
1142} 1141}
diff --git a/arch/m32r/include/asm/elf.h b/arch/m32r/include/asm/elf.h
index 2f85412ef730..b8da7d0574d2 100644
--- a/arch/m32r/include/asm/elf.h
+++ b/arch/m32r/include/asm/elf.h
@@ -82,9 +82,9 @@ typedef elf_fpreg_t elf_fpregset_t;
82 * These are used to set parameters in the core dumps. 82 * These are used to set parameters in the core dumps.
83 */ 83 */
84#define ELF_CLASS ELFCLASS32 84#define ELF_CLASS ELFCLASS32
85#if defined(__LITTLE_ENDIAN) 85#if defined(__LITTLE_ENDIAN__)
86#define ELF_DATA ELFDATA2LSB 86#define ELF_DATA ELFDATA2LSB
87#elif defined(__BIG_ENDIAN) 87#elif defined(__BIG_ENDIAN__)
88#define ELF_DATA ELFDATA2MSB 88#define ELF_DATA ELFDATA2MSB
89#else 89#else
90#error no endian defined 90#error no endian defined
diff --git a/arch/m32r/kernel/.gitignore b/arch/m32r/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/m32r/kernel/.gitignore
@@ -0,0 +1 @@
vmlinux.lds
diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c
index 3c71f776872c..7db26f1f082d 100644
--- a/arch/m32r/kernel/irq.c
+++ b/arch/m32r/kernel/irq.c
@@ -51,7 +51,7 @@ int show_interrupts(struct seq_file *p, void *v)
51 for_each_online_cpu(j) 51 for_each_online_cpu(j)
52 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 52 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
53#endif 53#endif
54 seq_printf(p, " %14s", irq_desc[i].chip->typename); 54 seq_printf(p, " %14s", irq_desc[i].chip->name);
55 seq_printf(p, " %s", action->name); 55 seq_printf(p, " %s", action->name);
56 56
57 for (action=action->next; action; action = action->next) 57 for (action=action->next; action; action = action->next)
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 7bbe38645ed5..a08697f0886d 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -28,6 +28,8 @@
28 28
29#define DEBUG_SIG 0 29#define DEBUG_SIG 0
30 30
31#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
32
31asmlinkage int 33asmlinkage int
32sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, 34sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
33 unsigned long r2, unsigned long r3, unsigned long r4, 35 unsigned long r2, unsigned long r3, unsigned long r4,
@@ -254,7 +256,7 @@ give_sigsegv:
254static int prev_insn(struct pt_regs *regs) 256static int prev_insn(struct pt_regs *regs)
255{ 257{
256 u16 inst; 258 u16 inst;
257 if (get_user(&inst, (u16 __user *)(regs->bpc - 2))) 259 if (get_user(inst, (u16 __user *)(regs->bpc - 2)))
258 return -EFAULT; 260 return -EFAULT;
259 if ((inst & 0xfff0) == 0x10f0) /* trap ? */ 261 if ((inst & 0xfff0) == 0x10f0) /* trap ? */
260 regs->bpc -= 2; 262 regs->bpc -= 2;
diff --git a/arch/m32r/platforms/m32104ut/setup.c b/arch/m32r/platforms/m32104ut/setup.c
index 922fdfdadeaa..402a59d7219b 100644
--- a/arch/m32r/platforms/m32104ut/setup.c
+++ b/arch/m32r/platforms/m32104ut/setup.c
@@ -65,7 +65,7 @@ static void shutdown_m32104ut_irq(unsigned int irq)
65 65
66static struct irq_chip m32104ut_irq_type = 66static struct irq_chip m32104ut_irq_type =
67{ 67{
68 .typename = "M32104UT-IRQ", 68 .name = "M32104UT-IRQ",
69 .startup = startup_m32104ut_irq, 69 .startup = startup_m32104ut_irq,
70 .shutdown = shutdown_m32104ut_irq, 70 .shutdown = shutdown_m32104ut_irq,
71 .enable = enable_m32104ut_irq, 71 .enable = enable_m32104ut_irq,
diff --git a/arch/m32r/platforms/m32700ut/setup.c b/arch/m32r/platforms/m32700ut/setup.c
index 9c1bc7487c1e..80b1a026795a 100644
--- a/arch/m32r/platforms/m32700ut/setup.c
+++ b/arch/m32r/platforms/m32700ut/setup.c
@@ -71,7 +71,7 @@ static void shutdown_m32700ut_irq(unsigned int irq)
71 71
72static struct irq_chip m32700ut_irq_type = 72static struct irq_chip m32700ut_irq_type =
73{ 73{
74 .typename = "M32700UT-IRQ", 74 .name = "M32700UT-IRQ",
75 .startup = startup_m32700ut_irq, 75 .startup = startup_m32700ut_irq,
76 .shutdown = shutdown_m32700ut_irq, 76 .shutdown = shutdown_m32700ut_irq,
77 .enable = enable_m32700ut_irq, 77 .enable = enable_m32700ut_irq,
@@ -148,7 +148,7 @@ static void shutdown_m32700ut_pld_irq(unsigned int irq)
148 148
149static struct irq_chip m32700ut_pld_irq_type = 149static struct irq_chip m32700ut_pld_irq_type =
150{ 150{
151 .typename = "M32700UT-PLD-IRQ", 151 .name = "M32700UT-PLD-IRQ",
152 .startup = startup_m32700ut_pld_irq, 152 .startup = startup_m32700ut_pld_irq,
153 .shutdown = shutdown_m32700ut_pld_irq, 153 .shutdown = shutdown_m32700ut_pld_irq,
154 .enable = enable_m32700ut_pld_irq, 154 .enable = enable_m32700ut_pld_irq,
@@ -217,7 +217,7 @@ static void shutdown_m32700ut_lanpld_irq(unsigned int irq)
217 217
218static struct irq_chip m32700ut_lanpld_irq_type = 218static struct irq_chip m32700ut_lanpld_irq_type =
219{ 219{
220 .typename = "M32700UT-PLD-LAN-IRQ", 220 .name = "M32700UT-PLD-LAN-IRQ",
221 .startup = startup_m32700ut_lanpld_irq, 221 .startup = startup_m32700ut_lanpld_irq,
222 .shutdown = shutdown_m32700ut_lanpld_irq, 222 .shutdown = shutdown_m32700ut_lanpld_irq,
223 .enable = enable_m32700ut_lanpld_irq, 223 .enable = enable_m32700ut_lanpld_irq,
@@ -286,7 +286,7 @@ static void shutdown_m32700ut_lcdpld_irq(unsigned int irq)
286 286
287static struct irq_chip m32700ut_lcdpld_irq_type = 287static struct irq_chip m32700ut_lcdpld_irq_type =
288{ 288{
289 .typename = "M32700UT-PLD-LCD-IRQ", 289 .name = "M32700UT-PLD-LCD-IRQ",
290 .startup = startup_m32700ut_lcdpld_irq, 290 .startup = startup_m32700ut_lcdpld_irq,
291 .shutdown = shutdown_m32700ut_lcdpld_irq, 291 .shutdown = shutdown_m32700ut_lcdpld_irq,
292 .enable = enable_m32700ut_lcdpld_irq, 292 .enable = enable_m32700ut_lcdpld_irq,
diff --git a/arch/m32r/platforms/mappi/setup.c b/arch/m32r/platforms/mappi/setup.c
index fb4b17799b66..ea00c84d6b1b 100644
--- a/arch/m32r/platforms/mappi/setup.c
+++ b/arch/m32r/platforms/mappi/setup.c
@@ -65,7 +65,7 @@ static void shutdown_mappi_irq(unsigned int irq)
65 65
66static struct irq_chip mappi_irq_type = 66static struct irq_chip mappi_irq_type =
67{ 67{
68 .typename = "MAPPI-IRQ", 68 .name = "MAPPI-IRQ",
69 .startup = startup_mappi_irq, 69 .startup = startup_mappi_irq,
70 .shutdown = shutdown_mappi_irq, 70 .shutdown = shutdown_mappi_irq,
71 .enable = enable_mappi_irq, 71 .enable = enable_mappi_irq,
diff --git a/arch/m32r/platforms/mappi2/setup.c b/arch/m32r/platforms/mappi2/setup.c
index 6a65eda0a056..c049376d0270 100644
--- a/arch/m32r/platforms/mappi2/setup.c
+++ b/arch/m32r/platforms/mappi2/setup.c
@@ -72,7 +72,7 @@ static void shutdown_mappi2_irq(unsigned int irq)
72 72
73static struct irq_chip mappi2_irq_type = 73static struct irq_chip mappi2_irq_type =
74{ 74{
75 .typename = "MAPPI2-IRQ", 75 .name = "MAPPI2-IRQ",
76 .startup = startup_mappi2_irq, 76 .startup = startup_mappi2_irq,
77 .shutdown = shutdown_mappi2_irq, 77 .shutdown = shutdown_mappi2_irq,
78 .enable = enable_mappi2_irq, 78 .enable = enable_mappi2_irq,
diff --git a/arch/m32r/platforms/mappi3/setup.c b/arch/m32r/platforms/mappi3/setup.c
index 9c337aeac94b..882de25c6e8c 100644
--- a/arch/m32r/platforms/mappi3/setup.c
+++ b/arch/m32r/platforms/mappi3/setup.c
@@ -72,7 +72,7 @@ static void shutdown_mappi3_irq(unsigned int irq)
72 72
73static struct irq_chip mappi3_irq_type = 73static struct irq_chip mappi3_irq_type =
74{ 74{
75 .typename = "MAPPI3-IRQ", 75 .name = "MAPPI3-IRQ",
76 .startup = startup_mappi3_irq, 76 .startup = startup_mappi3_irq,
77 .shutdown = shutdown_mappi3_irq, 77 .shutdown = shutdown_mappi3_irq,
78 .enable = enable_mappi3_irq, 78 .enable = enable_mappi3_irq,
diff --git a/arch/m32r/platforms/oaks32r/setup.c b/arch/m32r/platforms/oaks32r/setup.c
index ed865741c38d..d11d93bf74f5 100644
--- a/arch/m32r/platforms/oaks32r/setup.c
+++ b/arch/m32r/platforms/oaks32r/setup.c
@@ -63,7 +63,7 @@ static void shutdown_oaks32r_irq(unsigned int irq)
63 63
64static struct irq_chip oaks32r_irq_type = 64static struct irq_chip oaks32r_irq_type =
65{ 65{
66 .typename = "OAKS32R-IRQ", 66 .name = "OAKS32R-IRQ",
67 .startup = startup_oaks32r_irq, 67 .startup = startup_oaks32r_irq,
68 .shutdown = shutdown_oaks32r_irq, 68 .shutdown = shutdown_oaks32r_irq,
69 .enable = enable_oaks32r_irq, 69 .enable = enable_oaks32r_irq,
diff --git a/arch/m32r/platforms/opsput/setup.c b/arch/m32r/platforms/opsput/setup.c
index 80d680657019..5f3402a2fbaf 100644
--- a/arch/m32r/platforms/opsput/setup.c
+++ b/arch/m32r/platforms/opsput/setup.c
@@ -72,7 +72,7 @@ static void shutdown_opsput_irq(unsigned int irq)
72 72
73static struct irq_chip opsput_irq_type = 73static struct irq_chip opsput_irq_type =
74{ 74{
75 .typename = "OPSPUT-IRQ", 75 .name = "OPSPUT-IRQ",
76 .startup = startup_opsput_irq, 76 .startup = startup_opsput_irq,
77 .shutdown = shutdown_opsput_irq, 77 .shutdown = shutdown_opsput_irq,
78 .enable = enable_opsput_irq, 78 .enable = enable_opsput_irq,
@@ -149,7 +149,7 @@ static void shutdown_opsput_pld_irq(unsigned int irq)
149 149
150static struct irq_chip opsput_pld_irq_type = 150static struct irq_chip opsput_pld_irq_type =
151{ 151{
152 .typename = "OPSPUT-PLD-IRQ", 152 .name = "OPSPUT-PLD-IRQ",
153 .startup = startup_opsput_pld_irq, 153 .startup = startup_opsput_pld_irq,
154 .shutdown = shutdown_opsput_pld_irq, 154 .shutdown = shutdown_opsput_pld_irq,
155 .enable = enable_opsput_pld_irq, 155 .enable = enable_opsput_pld_irq,
@@ -218,7 +218,7 @@ static void shutdown_opsput_lanpld_irq(unsigned int irq)
218 218
219static struct irq_chip opsput_lanpld_irq_type = 219static struct irq_chip opsput_lanpld_irq_type =
220{ 220{
221 .typename = "OPSPUT-PLD-LAN-IRQ", 221 .name = "OPSPUT-PLD-LAN-IRQ",
222 .startup = startup_opsput_lanpld_irq, 222 .startup = startup_opsput_lanpld_irq,
223 .shutdown = shutdown_opsput_lanpld_irq, 223 .shutdown = shutdown_opsput_lanpld_irq,
224 .enable = enable_opsput_lanpld_irq, 224 .enable = enable_opsput_lanpld_irq,
diff --git a/arch/m32r/platforms/usrv/setup.c b/arch/m32r/platforms/usrv/setup.c
index 757302660af8..1beac7a51ed4 100644
--- a/arch/m32r/platforms/usrv/setup.c
+++ b/arch/m32r/platforms/usrv/setup.c
@@ -63,7 +63,7 @@ static void shutdown_mappi_irq(unsigned int irq)
63 63
64static struct irq_chip mappi_irq_type = 64static struct irq_chip mappi_irq_type =
65{ 65{
66 .typename = "M32700-IRQ", 66 .name = "M32700-IRQ",
67 .startup = startup_mappi_irq, 67 .startup = startup_mappi_irq,
68 .shutdown = shutdown_mappi_irq, 68 .shutdown = shutdown_mappi_irq,
69 .enable = enable_mappi_irq, 69 .enable = enable_mappi_irq,
@@ -136,7 +136,7 @@ static void shutdown_m32700ut_pld_irq(unsigned int irq)
136 136
137static struct irq_chip m32700ut_pld_irq_type = 137static struct irq_chip m32700ut_pld_irq_type =
138{ 138{
139 .typename = "USRV-PLD-IRQ", 139 .name = "USRV-PLD-IRQ",
140 .startup = startup_m32700ut_pld_irq, 140 .startup = startup_m32700ut_pld_irq,
141 .shutdown = shutdown_m32700ut_pld_irq, 141 .shutdown = shutdown_m32700ut_pld_irq,
142 .enable = enable_m32700ut_pld_irq, 142 .enable = enable_m32700ut_pld_irq,
diff --git a/arch/mips/Kbuild b/arch/mips/Kbuild
index e322d65f33a4..7dd65cfae837 100644
--- a/arch/mips/Kbuild
+++ b/arch/mips/Kbuild
@@ -7,6 +7,10 @@ subdir-ccflags-y := -Werror
7include arch/mips/Kbuild.platforms 7include arch/mips/Kbuild.platforms
8obj-y := $(platform-y) 8obj-y := $(platform-y)
9 9
10# make clean traverses $(obj-) without having included .config, so
11# everything ends up here
12obj- := $(platform-)
13
10# mips object files 14# mips object files
11# The object files are linked as core-y files would be linked 15# The object files are linked as core-y files would be linked
12 16
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 5526faabfc21..4c9f402295dd 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -881,11 +881,15 @@ config NO_IOPORT
881config GENERIC_ISA_DMA 881config GENERIC_ISA_DMA
882 bool 882 bool
883 select ZONE_DMA if GENERIC_ISA_DMA_SUPPORT_BROKEN=n 883 select ZONE_DMA if GENERIC_ISA_DMA_SUPPORT_BROKEN=n
884 select ISA_DMA_API
884 885
885config GENERIC_ISA_DMA_SUPPORT_BROKEN 886config GENERIC_ISA_DMA_SUPPORT_BROKEN
886 bool 887 bool
887 select GENERIC_ISA_DMA 888 select GENERIC_ISA_DMA
888 889
890config ISA_DMA_API
891 bool
892
889config GENERIC_GPIO 893config GENERIC_GPIO
890 bool 894 bool
891 895
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 5fd7f7a58b7e..5042d51b0512 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -105,4 +105,4 @@ OBJCOPYFLAGS_vmlinuz.srec := $(OBJCOPYFLAGS) -S -O srec
105vmlinuz.srec: vmlinuz 105vmlinuz.srec: vmlinuz
106 $(call cmd,objcopy) 106 $(call cmd,objcopy)
107 107
108clean-files := $(objtree)/vmlinuz.* 108clean-files := $(objtree)/vmlinuz $(objtree)/vmlinuz.{32,ecoff,bin,srec}
diff --git a/arch/mips/dec/Platform b/arch/mips/dec/Platform
index 3adbcbd95db1..cf55a6f4e720 100644
--- a/arch/mips/dec/Platform
+++ b/arch/mips/dec/Platform
@@ -1,7 +1,7 @@
1# 1#
2# DECstation family 2# DECstation family
3# 3#
4platform-$(CONFIG_MACH_DECSTATION) = dec/ 4platform-$(CONFIG_MACH_DECSTATION) += dec/
5cflags-$(CONFIG_MACH_DECSTATION) += \ 5cflags-$(CONFIG_MACH_DECSTATION) += \
6 -I$(srctree)/arch/mips/include/asm/mach-dec 6 -I$(srctree)/arch/mips/include/asm/mach-dec
7libs-$(CONFIG_MACH_DECSTATION) += arch/mips/dec/prom/ 7libs-$(CONFIG_MACH_DECSTATION) += arch/mips/dec/prom/
diff --git a/arch/mips/include/asm/fcntl.h b/arch/mips/include/asm/fcntl.h
index e482fe90fe88..75eddedcfc3e 100644
--- a/arch/mips/include/asm/fcntl.h
+++ b/arch/mips/include/asm/fcntl.h
@@ -56,6 +56,7 @@
56 */ 56 */
57 57
58#ifdef CONFIG_32BIT 58#ifdef CONFIG_32BIT
59#include <linux/types.h>
59 60
60struct flock { 61struct flock {
61 short l_type; 62 short l_type;
diff --git a/arch/mips/include/asm/siginfo.h b/arch/mips/include/asm/siginfo.h
index 96e28f18dad1..1ca64b4d33d9 100644
--- a/arch/mips/include/asm/siginfo.h
+++ b/arch/mips/include/asm/siginfo.h
@@ -88,6 +88,7 @@ typedef struct siginfo {
88#ifdef __ARCH_SI_TRAPNO 88#ifdef __ARCH_SI_TRAPNO
89 int _trapno; /* TRAP # which caused the signal */ 89 int _trapno; /* TRAP # which caused the signal */
90#endif 90#endif
91 short _addr_lsb;
91 } _sigfault; 92 } _sigfault;
92 93
93 /* SIGPOLL, SIGXFSZ (To do ...) */ 94 /* SIGPOLL, SIGXFSZ (To do ...) */
diff --git a/arch/mips/jz4740/Platform b/arch/mips/jz4740/Platform
index 6a97230e3d05..ba91be9c21ef 100644
--- a/arch/mips/jz4740/Platform
+++ b/arch/mips/jz4740/Platform
@@ -1,3 +1,3 @@
1core-$(CONFIG_MACH_JZ4740) += arch/mips/jz4740/ 1platform-$(CONFIG_MACH_JZ4740) += jz4740/
2cflags-$(CONFIG_MACH_JZ4740) += -I$(srctree)/arch/mips/include/asm/mach-jz4740 2cflags-$(CONFIG_MACH_JZ4740) += -I$(srctree)/arch/mips/include/asm/mach-jz4740
3load-$(CONFIG_MACH_JZ4740) += 0xffffffff80010000 3load-$(CONFIG_MACH_JZ4740) += 0xffffffff80010000
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index 0176ed015c89..32103cc2a257 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -40,7 +40,6 @@ int __compute_return_epc(struct pt_regs *regs)
40 return -EFAULT; 40 return -EFAULT;
41 } 41 }
42 42
43 regs->regs[0] = 0;
44 switch (insn.i_format.opcode) { 43 switch (insn.i_format.opcode) {
45 /* 44 /*
46 * jr and jalr are in r_format format. 45 * jr and jalr are in r_format format.
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index 2340f11dc29c..9a526ba6f257 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -103,7 +103,7 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len,
103 if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) 103 if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
104 goto out_unlock; 104 goto out_unlock;
105 105
106 retval = security_task_setscheduler(p, 0, NULL); 106 retval = security_task_setscheduler(p)
107 if (retval) 107 if (retval)
108 goto out_unlock; 108 goto out_unlock;
109 109
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index c51b95ff8644..c8777333e198 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -536,7 +536,7 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
536{ 536{
537 /* do the secure computing check first */ 537 /* do the secure computing check first */
538 if (!entryexit) 538 if (!entryexit)
539 secure_computing(regs->regs[0]); 539 secure_computing(regs->regs[2]);
540 540
541 if (unlikely(current->audit_context) && entryexit) 541 if (unlikely(current->audit_context) && entryexit)
542 audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]), 542 audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]),
@@ -565,7 +565,7 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
565 565
566out: 566out:
567 if (unlikely(current->audit_context) && !entryexit) 567 if (unlikely(current->audit_context) && !entryexit)
568 audit_syscall_entry(audit_arch(), regs->regs[0], 568 audit_syscall_entry(audit_arch(), regs->regs[2],
569 regs->regs[4], regs->regs[5], 569 regs->regs[4], regs->regs[5],
570 regs->regs[6], regs->regs[7]); 570 regs->regs[6], regs->regs[7]);
571} 571}
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 584415eef8c9..fbaabad0e6e2 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -63,9 +63,9 @@ stack_done:
63 sw t0, PT_R7(sp) # set error flag 63 sw t0, PT_R7(sp) # set error flag
64 beqz t0, 1f 64 beqz t0, 1f
65 65
66 lw t1, PT_R2(sp) # syscall number
66 negu v0 # error 67 negu v0 # error
67 sw v0, PT_R0(sp) # set flag for syscall 68 sw t1, PT_R0(sp) # save it for syscall restarting
68 # restarting
691: sw v0, PT_R2(sp) # result 691: sw v0, PT_R2(sp) # result
70 70
71o32_syscall_exit: 71o32_syscall_exit:
@@ -104,9 +104,9 @@ syscall_trace_entry:
104 sw t0, PT_R7(sp) # set error flag 104 sw t0, PT_R7(sp) # set error flag
105 beqz t0, 1f 105 beqz t0, 1f
106 106
107 lw t1, PT_R2(sp) # syscall number
107 negu v0 # error 108 negu v0 # error
108 sw v0, PT_R0(sp) # set flag for syscall 109 sw t1, PT_R0(sp) # save it for syscall restarting
109 # restarting
1101: sw v0, PT_R2(sp) # result 1101: sw v0, PT_R2(sp) # result
111 111
112 j syscall_exit 112 j syscall_exit
@@ -169,8 +169,7 @@ stackargs:
169 * We probably should handle this case a bit more drastic. 169 * We probably should handle this case a bit more drastic.
170 */ 170 */
171bad_stack: 171bad_stack:
172 negu v0 # error 172 li v0, EFAULT
173 sw v0, PT_R0(sp)
174 sw v0, PT_R2(sp) 173 sw v0, PT_R2(sp)
175 li t0, 1 # set error flag 174 li t0, 1 # set error flag
176 sw t0, PT_R7(sp) 175 sw t0, PT_R7(sp)
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 5573f8e4e326..3f4179283207 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -66,9 +66,9 @@ NESTED(handle_sys64, PT_SIZE, sp)
66 sd t0, PT_R7(sp) # set error flag 66 sd t0, PT_R7(sp) # set error flag
67 beqz t0, 1f 67 beqz t0, 1f
68 68
69 ld t1, PT_R2(sp) # syscall number
69 dnegu v0 # error 70 dnegu v0 # error
70 sd v0, PT_R0(sp) # set flag for syscall 71 sd t1, PT_R0(sp) # save it for syscall restarting
71 # restarting
721: sd v0, PT_R2(sp) # result 721: sd v0, PT_R2(sp) # result
73 73
74n64_syscall_exit: 74n64_syscall_exit:
@@ -109,8 +109,9 @@ syscall_trace_entry:
109 sd t0, PT_R7(sp) # set error flag 109 sd t0, PT_R7(sp) # set error flag
110 beqz t0, 1f 110 beqz t0, 1f
111 111
112 ld t1, PT_R2(sp) # syscall number
112 dnegu v0 # error 113 dnegu v0 # error
113 sd v0, PT_R0(sp) # set flag for syscall restarting 114 sd t1, PT_R0(sp) # save it for syscall restarting
1141: sd v0, PT_R2(sp) # result 1151: sd v0, PT_R2(sp) # result
115 116
116 j syscall_exit 117 j syscall_exit
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 1e38ec97672e..f08ece6d8acc 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -65,8 +65,9 @@ NESTED(handle_sysn32, PT_SIZE, sp)
65 sd t0, PT_R7(sp) # set error flag 65 sd t0, PT_R7(sp) # set error flag
66 beqz t0, 1f 66 beqz t0, 1f
67 67
68 ld t1, PT_R2(sp) # syscall number
68 dnegu v0 # error 69 dnegu v0 # error
69 sd v0, PT_R0(sp) # set flag for syscall restarting 70 sd t1, PT_R0(sp) # save it for syscall restarting
701: sd v0, PT_R2(sp) # result 711: sd v0, PT_R2(sp) # result
71 72
72 local_irq_disable # make sure need_resched and 73 local_irq_disable # make sure need_resched and
@@ -106,8 +107,9 @@ n32_syscall_trace_entry:
106 sd t0, PT_R7(sp) # set error flag 107 sd t0, PT_R7(sp) # set error flag
107 beqz t0, 1f 108 beqz t0, 1f
108 109
110 ld t1, PT_R2(sp) # syscall number
109 dnegu v0 # error 111 dnegu v0 # error
110 sd v0, PT_R0(sp) # set flag for syscall restarting 112 sd t1, PT_R0(sp) # save it for syscall restarting
1111: sd v0, PT_R2(sp) # result 1131: sd v0, PT_R2(sp) # result
112 114
113 j syscall_exit 115 j syscall_exit
@@ -320,10 +322,10 @@ EXPORT(sysn32_call_table)
320 PTR sys_cacheflush 322 PTR sys_cacheflush
321 PTR sys_cachectl 323 PTR sys_cachectl
322 PTR sys_sysmips 324 PTR sys_sysmips
323 PTR sys_io_setup /* 6200 */ 325 PTR compat_sys_io_setup /* 6200 */
324 PTR sys_io_destroy 326 PTR sys_io_destroy
325 PTR sys_io_getevents 327 PTR compat_sys_io_getevents
326 PTR sys_io_submit 328 PTR compat_sys_io_submit
327 PTR sys_io_cancel 329 PTR sys_io_cancel
328 PTR sys_exit_group /* 6205 */ 330 PTR sys_exit_group /* 6205 */
329 PTR sys_lookup_dcookie 331 PTR sys_lookup_dcookie
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 171979fc98e5..78d768a3e19d 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -93,8 +93,9 @@ NESTED(handle_sys, PT_SIZE, sp)
93 sd t0, PT_R7(sp) # set error flag 93 sd t0, PT_R7(sp) # set error flag
94 beqz t0, 1f 94 beqz t0, 1f
95 95
96 ld t1, PT_R2(sp) # syscall number
96 dnegu v0 # error 97 dnegu v0 # error
97 sd v0, PT_R0(sp) # flag for syscall restarting 98 sd t1, PT_R0(sp) # save it for syscall restarting
981: sd v0, PT_R2(sp) # result 991: sd v0, PT_R2(sp) # result
99 100
100o32_syscall_exit: 101o32_syscall_exit:
@@ -142,8 +143,9 @@ trace_a_syscall:
142 sd t0, PT_R7(sp) # set error flag 143 sd t0, PT_R7(sp) # set error flag
143 beqz t0, 1f 144 beqz t0, 1f
144 145
146 ld t1, PT_R2(sp) # syscall number
145 dnegu v0 # error 147 dnegu v0 # error
146 sd v0, PT_R0(sp) # set flag for syscall restarting 148 sd t1, PT_R0(sp) # save it for syscall restarting
1471: sd v0, PT_R2(sp) # result 1491: sd v0, PT_R2(sp) # result
148 150
149 j syscall_exit 151 j syscall_exit
@@ -154,8 +156,7 @@ trace_a_syscall:
154 * The stackpointer for a call with more than 4 arguments is bad. 156 * The stackpointer for a call with more than 4 arguments is bad.
155 */ 157 */
156bad_stack: 158bad_stack:
157 dnegu v0 # error 159 li v0, EFAULT
158 sd v0, PT_R0(sp)
159 sd v0, PT_R2(sp) 160 sd v0, PT_R2(sp)
160 li t0, 1 # set error flag 161 li t0, 1 # set error flag
161 sd t0, PT_R7(sp) 162 sd t0, PT_R7(sp)
@@ -444,10 +445,10 @@ sys_call_table:
444 PTR compat_sys_futex 445 PTR compat_sys_futex
445 PTR compat_sys_sched_setaffinity 446 PTR compat_sys_sched_setaffinity
446 PTR compat_sys_sched_getaffinity /* 4240 */ 447 PTR compat_sys_sched_getaffinity /* 4240 */
447 PTR sys_io_setup 448 PTR compat_sys_io_setup
448 PTR sys_io_destroy 449 PTR sys_io_destroy
449 PTR sys_io_getevents 450 PTR compat_sys_io_getevents
450 PTR sys_io_submit 451 PTR compat_sys_io_submit
451 PTR sys_io_cancel /* 4245 */ 452 PTR sys_io_cancel /* 4245 */
452 PTR sys_exit_group 453 PTR sys_exit_group
453 PTR sys32_lookup_dcookie 454 PTR sys32_lookup_dcookie
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 2099d5a4c4b7..5922342bca39 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -390,7 +390,6 @@ asmlinkage void sys_rt_sigreturn(nabi_no_regargs struct pt_regs regs)
390{ 390{
391 struct rt_sigframe __user *frame; 391 struct rt_sigframe __user *frame;
392 sigset_t set; 392 sigset_t set;
393 stack_t st;
394 int sig; 393 int sig;
395 394
396 frame = (struct rt_sigframe __user *) regs.regs[29]; 395 frame = (struct rt_sigframe __user *) regs.regs[29];
@@ -411,11 +410,9 @@ asmlinkage void sys_rt_sigreturn(nabi_no_regargs struct pt_regs regs)
411 else if (sig) 410 else if (sig)
412 force_sig(sig, current); 411 force_sig(sig, current);
413 412
414 if (__copy_from_user(&st, &frame->rs_uc.uc_stack, sizeof(st)))
415 goto badframe;
416 /* It is more difficult to avoid calling this function than to 413 /* It is more difficult to avoid calling this function than to
417 call it and ignore errors. */ 414 call it and ignore errors. */
418 do_sigaltstack((stack_t __user *)&st, NULL, regs.regs[29]); 415 do_sigaltstack(&frame->rs_uc.uc_stack, NULL, regs.regs[29]);
419 416
420 /* 417 /*
421 * Don't let your children do this ... 418 * Don't let your children do this ...
@@ -550,23 +547,26 @@ static int handle_signal(unsigned long sig, siginfo_t *info,
550 struct mips_abi *abi = current->thread.abi; 547 struct mips_abi *abi = current->thread.abi;
551 void *vdso = current->mm->context.vdso; 548 void *vdso = current->mm->context.vdso;
552 549
553 switch(regs->regs[0]) { 550 if (regs->regs[0]) {
554 case ERESTART_RESTARTBLOCK: 551 switch(regs->regs[2]) {
555 case ERESTARTNOHAND: 552 case ERESTART_RESTARTBLOCK:
556 regs->regs[2] = EINTR; 553 case ERESTARTNOHAND:
557 break;
558 case ERESTARTSYS:
559 if (!(ka->sa.sa_flags & SA_RESTART)) {
560 regs->regs[2] = EINTR; 554 regs->regs[2] = EINTR;
561 break; 555 break;
556 case ERESTARTSYS:
557 if (!(ka->sa.sa_flags & SA_RESTART)) {
558 regs->regs[2] = EINTR;
559 break;
560 }
561 /* fallthrough */
562 case ERESTARTNOINTR:
563 regs->regs[7] = regs->regs[26];
564 regs->regs[2] = regs->regs[0];
565 regs->cp0_epc -= 4;
562 } 566 }
563 /* fallthrough */
564 case ERESTARTNOINTR: /* Userland will reload $v0. */
565 regs->regs[7] = regs->regs[26];
566 regs->cp0_epc -= 8;
567 }
568 567
569 regs->regs[0] = 0; /* Don't deal with this again. */ 568 regs->regs[0] = 0; /* Don't deal with this again. */
569 }
570 570
571 if (sig_uses_siginfo(ka)) 571 if (sig_uses_siginfo(ka))
572 ret = abi->setup_rt_frame(vdso + abi->rt_signal_return_offset, 572 ret = abi->setup_rt_frame(vdso + abi->rt_signal_return_offset,
@@ -575,6 +575,9 @@ static int handle_signal(unsigned long sig, siginfo_t *info,
575 ret = abi->setup_frame(vdso + abi->signal_return_offset, 575 ret = abi->setup_frame(vdso + abi->signal_return_offset,
576 ka, regs, sig, oldset); 576 ka, regs, sig, oldset);
577 577
578 if (ret)
579 return ret;
580
578 spin_lock_irq(&current->sighand->siglock); 581 spin_lock_irq(&current->sighand->siglock);
579 sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask); 582 sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
580 if (!(ka->sa.sa_flags & SA_NODEFER)) 583 if (!(ka->sa.sa_flags & SA_NODEFER))
@@ -622,17 +625,13 @@ static void do_signal(struct pt_regs *regs)
622 return; 625 return;
623 } 626 }
624 627
625 /*
626 * Who's code doesn't conform to the restartable syscall convention
627 * dies here!!! The li instruction, a single machine instruction,
628 * must directly be followed by the syscall instruction.
629 */
630 if (regs->regs[0]) { 628 if (regs->regs[0]) {
631 if (regs->regs[2] == ERESTARTNOHAND || 629 if (regs->regs[2] == ERESTARTNOHAND ||
632 regs->regs[2] == ERESTARTSYS || 630 regs->regs[2] == ERESTARTSYS ||
633 regs->regs[2] == ERESTARTNOINTR) { 631 regs->regs[2] == ERESTARTNOINTR) {
632 regs->regs[2] = regs->regs[0];
634 regs->regs[7] = regs->regs[26]; 633 regs->regs[7] = regs->regs[26];
635 regs->cp0_epc -= 8; 634 regs->cp0_epc -= 4;
636 } 635 }
637 if (regs->regs[2] == ERESTART_RESTARTBLOCK) { 636 if (regs->regs[2] == ERESTART_RESTARTBLOCK) {
638 regs->regs[2] = current->thread.abi->restart; 637 regs->regs[2] = current->thread.abi->restart;
diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c
index 2c5df818c65a..ee24d814d5b9 100644
--- a/arch/mips/kernel/signal_n32.c
+++ b/arch/mips/kernel/signal_n32.c
@@ -109,6 +109,7 @@ asmlinkage int sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs)
109asmlinkage void sysn32_rt_sigreturn(nabi_no_regargs struct pt_regs regs) 109asmlinkage void sysn32_rt_sigreturn(nabi_no_regargs struct pt_regs regs)
110{ 110{
111 struct rt_sigframe_n32 __user *frame; 111 struct rt_sigframe_n32 __user *frame;
112 mm_segment_t old_fs;
112 sigset_t set; 113 sigset_t set;
113 stack_t st; 114 stack_t st;
114 s32 sp; 115 s32 sp;
@@ -143,7 +144,11 @@ asmlinkage void sysn32_rt_sigreturn(nabi_no_regargs struct pt_regs regs)
143 144
144 /* It is more difficult to avoid calling this function than to 145 /* It is more difficult to avoid calling this function than to
145 call it and ignore errors. */ 146 call it and ignore errors. */
147 old_fs = get_fs();
148 set_fs(KERNEL_DS);
146 do_sigaltstack((stack_t __user *)&st, NULL, regs.regs[29]); 149 do_sigaltstack((stack_t __user *)&st, NULL, regs.regs[29]);
150 set_fs(old_fs);
151
147 152
148 /* 153 /*
149 * Don't let your children do this ... 154 * Don't let your children do this ...
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index 69b039ca8d83..33d5a5ce4a29 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -109,8 +109,6 @@ static void emulate_load_store_insn(struct pt_regs *regs,
109 unsigned long value; 109 unsigned long value;
110 unsigned int res; 110 unsigned int res;
111 111
112 regs->regs[0] = 0;
113
114 /* 112 /*
115 * This load never faults. 113 * This load never faults.
116 */ 114 */
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 907417d187e1..79a04a9394d5 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -16,6 +16,7 @@ config PARISC
16 select RTC_DRV_GENERIC 16 select RTC_DRV_GENERIC
17 select INIT_ALL_POSSIBLE 17 select INIT_ALL_POSSIBLE
18 select BUG 18 select BUG
19 select HAVE_IRQ_WORK
19 select HAVE_PERF_EVENTS 20 select HAVE_PERF_EVENTS
20 select GENERIC_ATOMIC64 if !64BIT 21 select GENERIC_ATOMIC64 if !64BIT
21 help 22 help
diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h
index cc146427d8f9..1e0fd8ba6c03 100644
--- a/arch/parisc/include/asm/perf_event.h
+++ b/arch/parisc/include/asm/perf_event.h
@@ -1,7 +1,6 @@
1#ifndef __ASM_PARISC_PERF_EVENT_H 1#ifndef __ASM_PARISC_PERF_EVENT_H
2#define __ASM_PARISC_PERF_EVENT_H 2#define __ASM_PARISC_PERF_EVENT_H
3 3
4/* parisc only supports software events through this interface. */ 4/* Empty, just to avoid compiling error */
5static inline void set_perf_event_pending(void) { }
6 5
7#endif /* __ASM_PARISC_PERF_EVENT_H */ 6#endif /* __ASM_PARISC_PERF_EVENT_H */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 631e5a0fb6ab..4b1e521d966f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -138,6 +138,7 @@ config PPC
138 select HAVE_OPROFILE 138 select HAVE_OPROFILE
139 select HAVE_SYSCALL_WRAPPERS if PPC64 139 select HAVE_SYSCALL_WRAPPERS if PPC64
140 select GENERIC_ATOMIC64 if PPC32 140 select GENERIC_ATOMIC64 if PPC32
141 select HAVE_IRQ_WORK
141 select HAVE_PERF_EVENTS 142 select HAVE_PERF_EVENTS
142 select HAVE_REGS_AND_STACK_ACCESS_API 143 select HAVE_REGS_AND_STACK_ACCESS_API
143 select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 144 select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
diff --git a/arch/powerpc/boot/dts/mpc8536ds.dts b/arch/powerpc/boot/dts/mpc8536ds.dts
index 815cebb2e3e5..a75c10eed269 100644
--- a/arch/powerpc/boot/dts/mpc8536ds.dts
+++ b/arch/powerpc/boot/dts/mpc8536ds.dts
@@ -108,6 +108,58 @@
108 }; 108 };
109 }; 109 };
110 110
111 spi@7000 {
112 #address-cells = <1>;
113 #size-cells = <0>;
114 compatible = "fsl,mpc8536-espi";
115 reg = <0x7000 0x1000>;
116 interrupts = <59 0x2>;
117 interrupt-parent = <&mpic>;
118 fsl,espi-num-chipselects = <4>;
119
120 flash@0 {
121 #address-cells = <1>;
122 #size-cells = <1>;
123 compatible = "spansion,s25sl12801";
124 reg = <0>;
125 spi-max-frequency = <40000000>;
126 partition@u-boot {
127 label = "u-boot";
128 reg = <0x00000000 0x00100000>;
129 read-only;
130 };
131 partition@kernel {
132 label = "kernel";
133 reg = <0x00100000 0x00500000>;
134 read-only;
135 };
136 partition@dtb {
137 label = "dtb";
138 reg = <0x00600000 0x00100000>;
139 read-only;
140 };
141 partition@fs {
142 label = "file system";
143 reg = <0x00700000 0x00900000>;
144 };
145 };
146 flash@1 {
147 compatible = "spansion,s25sl12801";
148 reg = <1>;
149 spi-max-frequency = <40000000>;
150 };
151 flash@2 {
152 compatible = "spansion,s25sl12801";
153 reg = <2>;
154 spi-max-frequency = <40000000>;
155 };
156 flash@3 {
157 compatible = "spansion,s25sl12801";
158 reg = <3>;
159 spi-max-frequency = <40000000>;
160 };
161 };
162
111 dma@21300 { 163 dma@21300 {
112 #address-cells = <1>; 164 #address-cells = <1>;
113 #size-cells = <1>; 165 #size-cells = <1>;
diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts
index 2f0de24e3822..5b7fc29dd6cf 100644
--- a/arch/powerpc/boot/dts/p4080ds.dts
+++ b/arch/powerpc/boot/dts/p4080ds.dts
@@ -236,22 +236,19 @@
236 }; 236 };
237 237
238 spi@110000 { 238 spi@110000 {
239 cell-index = <0>;
240 #address-cells = <1>; 239 #address-cells = <1>;
241 #size-cells = <0>; 240 #size-cells = <0>;
242 compatible = "fsl,espi"; 241 compatible = "fsl,p4080-espi", "fsl,mpc8536-espi";
243 reg = <0x110000 0x1000>; 242 reg = <0x110000 0x1000>;
244 interrupts = <53 0x2>; 243 interrupts = <53 0x2>;
245 interrupt-parent = <&mpic>; 244 interrupt-parent = <&mpic>;
246 espi,num-ss-bits = <4>; 245 fsl,espi-num-chipselects = <4>;
247 mode = "cpu";
248 246
249 fsl_m25p80@0 { 247 flash@0 {
250 #address-cells = <1>; 248 #address-cells = <1>;
251 #size-cells = <1>; 249 #size-cells = <1>;
252 compatible = "fsl,espi-flash"; 250 compatible = "spansion,s25sl12801";
253 reg = <0>; 251 reg = <0>;
254 linux,modalias = "fsl_m25p80";
255 spi-max-frequency = <40000000>; /* input clock */ 252 spi-max-frequency = <40000000>; /* input clock */
256 partition@u-boot { 253 partition@u-boot {
257 label = "u-boot"; 254 label = "u-boot";
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1ff6662f7faf..9b287fdd8ea3 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -129,7 +129,7 @@ struct paca_struct {
129 u8 soft_enabled; /* irq soft-enable flag */ 129 u8 soft_enabled; /* irq soft-enable flag */
130 u8 hard_enabled; /* set if irqs are enabled in MSR */ 130 u8 hard_enabled; /* set if irqs are enabled in MSR */
131 u8 io_sync; /* writel() needs spin_unlock sync */ 131 u8 io_sync; /* writel() needs spin_unlock sync */
132 u8 perf_event_pending; /* PM interrupt while soft-disabled */ 132 u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
133 133
134 /* Stuff for accurate time accounting */ 134 /* Stuff for accurate time accounting */
135 u64 user_time; /* accumulated usermode TB ticks */ 135 u64 user_time; /* accumulated usermode TB ticks */
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index 6c294acac848..9c3d160670b4 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -542,10 +542,6 @@ extern void reloc_got2(unsigned long);
542 542
543#define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) 543#define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x)))
544 544
545#ifdef CONFIG_VIRT_CPU_ACCOUNTING
546extern void account_system_vtime(struct task_struct *);
547#endif
548
549extern struct dentry *powerpc_debugfs_root; 545extern struct dentry *powerpc_debugfs_root;
550 546
551#endif /* __KERNEL__ */ 547#endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index 95ad9dad298e..d05ae4204bbf 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -23,18 +23,6 @@
23#include "ppc32.h" 23#include "ppc32.h"
24#endif 24#endif
25 25
26/*
27 * Store another value in a callchain_entry.
28 */
29static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
30{
31 unsigned int nr = entry->nr;
32
33 if (nr < PERF_MAX_STACK_DEPTH) {
34 entry->ip[nr] = ip;
35 entry->nr = nr + 1;
36 }
37}
38 26
39/* 27/*
40 * Is sp valid as the address of the next kernel stack frame after prev_sp? 28 * Is sp valid as the address of the next kernel stack frame after prev_sp?
@@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
58 return 0; 46 return 0;
59} 47}
60 48
61static void perf_callchain_kernel(struct pt_regs *regs, 49void
62 struct perf_callchain_entry *entry) 50perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
63{ 51{
64 unsigned long sp, next_sp; 52 unsigned long sp, next_sp;
65 unsigned long next_ip; 53 unsigned long next_ip;
@@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
69 57
70 lr = regs->link; 58 lr = regs->link;
71 sp = regs->gpr[1]; 59 sp = regs->gpr[1];
72 callchain_store(entry, PERF_CONTEXT_KERNEL); 60 perf_callchain_store(entry, regs->nip);
73 callchain_store(entry, regs->nip);
74 61
75 if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) 62 if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
76 return; 63 return;
@@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
89 next_ip = regs->nip; 76 next_ip = regs->nip;
90 lr = regs->link; 77 lr = regs->link;
91 level = 0; 78 level = 0;
92 callchain_store(entry, PERF_CONTEXT_KERNEL); 79 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
93 80
94 } else { 81 } else {
95 if (level == 0) 82 if (level == 0)
@@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
111 ++level; 98 ++level;
112 } 99 }
113 100
114 callchain_store(entry, next_ip); 101 perf_callchain_store(entry, next_ip);
115 if (!valid_next_sp(next_sp, sp)) 102 if (!valid_next_sp(next_sp, sp))
116 return; 103 return;
117 sp = next_sp; 104 sp = next_sp;
@@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp)
233 puc == (unsigned long) &sf->uc; 220 puc == (unsigned long) &sf->uc;
234} 221}
235 222
236static void perf_callchain_user_64(struct pt_regs *regs, 223static void perf_callchain_user_64(struct perf_callchain_entry *entry,
237 struct perf_callchain_entry *entry) 224 struct pt_regs *regs)
238{ 225{
239 unsigned long sp, next_sp; 226 unsigned long sp, next_sp;
240 unsigned long next_ip; 227 unsigned long next_ip;
@@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs,
246 next_ip = regs->nip; 233 next_ip = regs->nip;
247 lr = regs->link; 234 lr = regs->link;
248 sp = regs->gpr[1]; 235 sp = regs->gpr[1];
249 callchain_store(entry, PERF_CONTEXT_USER); 236 perf_callchain_store(entry, next_ip);
250 callchain_store(entry, next_ip);
251 237
252 for (;;) { 238 for (;;) {
253 fp = (unsigned long __user *) sp; 239 fp = (unsigned long __user *) sp;
@@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs,
276 read_user_stack_64(&uregs[PT_R1], &sp)) 262 read_user_stack_64(&uregs[PT_R1], &sp))
277 return; 263 return;
278 level = 0; 264 level = 0;
279 callchain_store(entry, PERF_CONTEXT_USER); 265 perf_callchain_store(entry, PERF_CONTEXT_USER);
280 callchain_store(entry, next_ip); 266 perf_callchain_store(entry, next_ip);
281 continue; 267 continue;
282 } 268 }
283 269
284 if (level == 0) 270 if (level == 0)
285 next_ip = lr; 271 next_ip = lr;
286 callchain_store(entry, next_ip); 272 perf_callchain_store(entry, next_ip);
287 ++level; 273 ++level;
288 sp = next_sp; 274 sp = next_sp;
289 } 275 }
@@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
315 return __get_user_inatomic(*ret, ptr); 301 return __get_user_inatomic(*ret, ptr);
316} 302}
317 303
318static inline void perf_callchain_user_64(struct pt_regs *regs, 304static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
319 struct perf_callchain_entry *entry) 305 struct pt_regs *regs)
320{ 306{
321} 307}
322 308
@@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp,
435 return mctx->mc_gregs; 421 return mctx->mc_gregs;
436} 422}
437 423
438static void perf_callchain_user_32(struct pt_regs *regs, 424static void perf_callchain_user_32(struct perf_callchain_entry *entry,
439 struct perf_callchain_entry *entry) 425 struct pt_regs *regs)
440{ 426{
441 unsigned int sp, next_sp; 427 unsigned int sp, next_sp;
442 unsigned int next_ip; 428 unsigned int next_ip;
@@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs,
447 next_ip = regs->nip; 433 next_ip = regs->nip;
448 lr = regs->link; 434 lr = regs->link;
449 sp = regs->gpr[1]; 435 sp = regs->gpr[1];
450 callchain_store(entry, PERF_CONTEXT_USER); 436 perf_callchain_store(entry, next_ip);
451 callchain_store(entry, next_ip);
452 437
453 while (entry->nr < PERF_MAX_STACK_DEPTH) { 438 while (entry->nr < PERF_MAX_STACK_DEPTH) {
454 fp = (unsigned int __user *) (unsigned long) sp; 439 fp = (unsigned int __user *) (unsigned long) sp;
@@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs,
470 read_user_stack_32(&uregs[PT_R1], &sp)) 455 read_user_stack_32(&uregs[PT_R1], &sp))
471 return; 456 return;
472 level = 0; 457 level = 0;
473 callchain_store(entry, PERF_CONTEXT_USER); 458 perf_callchain_store(entry, PERF_CONTEXT_USER);
474 callchain_store(entry, next_ip); 459 perf_callchain_store(entry, next_ip);
475 continue; 460 continue;
476 } 461 }
477 462
478 if (level == 0) 463 if (level == 0)
479 next_ip = lr; 464 next_ip = lr;
480 callchain_store(entry, next_ip); 465 perf_callchain_store(entry, next_ip);
481 ++level; 466 ++level;
482 sp = next_sp; 467 sp = next_sp;
483 } 468 }
484} 469}
485 470
486/* 471void
487 * Since we can't get PMU interrupts inside a PMU interrupt handler, 472perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
488 * we don't need separate irq and nmi entries here.
489 */
490static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
491
492struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
493{ 473{
494 struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); 474 if (current_is_64bit())
495 475 perf_callchain_user_64(entry, regs);
496 entry->nr = 0; 476 else
497 477 perf_callchain_user_32(entry, regs);
498 if (!user_mode(regs)) {
499 perf_callchain_kernel(regs, entry);
500 if (current->mm)
501 regs = task_pt_regs(current);
502 else
503 regs = NULL;
504 }
505
506 if (regs) {
507 if (current_is_64bit())
508 perf_callchain_user_64(regs, entry);
509 else
510 perf_callchain_user_32(regs, entry);
511 }
512
513 return entry;
514} 478}
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index d301a30445e0..3129c855933c 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event)
402{ 402{
403 s64 val, delta, prev; 403 s64 val, delta, prev;
404 404
405 if (event->hw.state & PERF_HES_STOPPED)
406 return;
407
405 if (!event->hw.idx) 408 if (!event->hw.idx)
406 return; 409 return;
407 /* 410 /*
@@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
517 * Disable all events to prevent PMU interrupts and to allow 520 * Disable all events to prevent PMU interrupts and to allow
518 * events to be added or removed. 521 * events to be added or removed.
519 */ 522 */
520void hw_perf_disable(void) 523static void power_pmu_disable(struct pmu *pmu)
521{ 524{
522 struct cpu_hw_events *cpuhw; 525 struct cpu_hw_events *cpuhw;
523 unsigned long flags; 526 unsigned long flags;
@@ -565,7 +568,7 @@ void hw_perf_disable(void)
565 * If we were previously disabled and events were added, then 568 * If we were previously disabled and events were added, then
566 * put the new config on the PMU. 569 * put the new config on the PMU.
567 */ 570 */
568void hw_perf_enable(void) 571static void power_pmu_enable(struct pmu *pmu)
569{ 572{
570 struct perf_event *event; 573 struct perf_event *event;
571 struct cpu_hw_events *cpuhw; 574 struct cpu_hw_events *cpuhw;
@@ -672,6 +675,8 @@ void hw_perf_enable(void)
672 } 675 }
673 local64_set(&event->hw.prev_count, val); 676 local64_set(&event->hw.prev_count, val);
674 event->hw.idx = idx; 677 event->hw.idx = idx;
678 if (event->hw.state & PERF_HES_STOPPED)
679 val = 0;
675 write_pmc(idx, val); 680 write_pmc(idx, val);
676 perf_event_update_userpage(event); 681 perf_event_update_userpage(event);
677 } 682 }
@@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count,
727 * re-enable the PMU in order to get hw_perf_enable to do the 732 * re-enable the PMU in order to get hw_perf_enable to do the
728 * actual work of reconfiguring the PMU. 733 * actual work of reconfiguring the PMU.
729 */ 734 */
730static int power_pmu_enable(struct perf_event *event) 735static int power_pmu_add(struct perf_event *event, int ef_flags)
731{ 736{
732 struct cpu_hw_events *cpuhw; 737 struct cpu_hw_events *cpuhw;
733 unsigned long flags; 738 unsigned long flags;
@@ -735,7 +740,7 @@ static int power_pmu_enable(struct perf_event *event)
735 int ret = -EAGAIN; 740 int ret = -EAGAIN;
736 741
737 local_irq_save(flags); 742 local_irq_save(flags);
738 perf_disable(); 743 perf_pmu_disable(event->pmu);
739 744
740 /* 745 /*
741 * Add the event to the list (if there is room) 746 * Add the event to the list (if there is room)
@@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event)
749 cpuhw->events[n0] = event->hw.config; 754 cpuhw->events[n0] = event->hw.config;
750 cpuhw->flags[n0] = event->hw.event_base; 755 cpuhw->flags[n0] = event->hw.event_base;
751 756
757 if (!(ef_flags & PERF_EF_START))
758 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
759
752 /* 760 /*
753 * If group events scheduling transaction was started, 761 * If group events scheduling transaction was started,
754 * skip the schedulability test here, it will be peformed 762 * skip the schedulability test here, it will be peformed
@@ -769,7 +777,7 @@ nocheck:
769 777
770 ret = 0; 778 ret = 0;
771 out: 779 out:
772 perf_enable(); 780 perf_pmu_enable(event->pmu);
773 local_irq_restore(flags); 781 local_irq_restore(flags);
774 return ret; 782 return ret;
775} 783}
@@ -777,14 +785,14 @@ nocheck:
777/* 785/*
778 * Remove a event from the PMU. 786 * Remove a event from the PMU.
779 */ 787 */
780static void power_pmu_disable(struct perf_event *event) 788static void power_pmu_del(struct perf_event *event, int ef_flags)
781{ 789{
782 struct cpu_hw_events *cpuhw; 790 struct cpu_hw_events *cpuhw;
783 long i; 791 long i;
784 unsigned long flags; 792 unsigned long flags;
785 793
786 local_irq_save(flags); 794 local_irq_save(flags);
787 perf_disable(); 795 perf_pmu_disable(event->pmu);
788 796
789 power_pmu_read(event); 797 power_pmu_read(event);
790 798
@@ -821,34 +829,60 @@ static void power_pmu_disable(struct perf_event *event)
821 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); 829 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
822 } 830 }
823 831
824 perf_enable(); 832 perf_pmu_enable(event->pmu);
825 local_irq_restore(flags); 833 local_irq_restore(flags);
826} 834}
827 835
828/* 836/*
829 * Re-enable interrupts on a event after they were throttled 837 * POWER-PMU does not support disabling individual counters, hence
830 * because they were coming too fast. 838 * program their cycle counter to their max value and ignore the interrupts.
831 */ 839 */
832static void power_pmu_unthrottle(struct perf_event *event) 840
841static void power_pmu_start(struct perf_event *event, int ef_flags)
842{
843 unsigned long flags;
844 s64 left;
845
846 if (!event->hw.idx || !event->hw.sample_period)
847 return;
848
849 if (!(event->hw.state & PERF_HES_STOPPED))
850 return;
851
852 if (ef_flags & PERF_EF_RELOAD)
853 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
854
855 local_irq_save(flags);
856 perf_pmu_disable(event->pmu);
857
858 event->hw.state = 0;
859 left = local64_read(&event->hw.period_left);
860 write_pmc(event->hw.idx, left);
861
862 perf_event_update_userpage(event);
863 perf_pmu_enable(event->pmu);
864 local_irq_restore(flags);
865}
866
867static void power_pmu_stop(struct perf_event *event, int ef_flags)
833{ 868{
834 s64 val, left;
835 unsigned long flags; 869 unsigned long flags;
836 870
837 if (!event->hw.idx || !event->hw.sample_period) 871 if (!event->hw.idx || !event->hw.sample_period)
838 return; 872 return;
873
874 if (event->hw.state & PERF_HES_STOPPED)
875 return;
876
839 local_irq_save(flags); 877 local_irq_save(flags);
840 perf_disable(); 878 perf_pmu_disable(event->pmu);
879
841 power_pmu_read(event); 880 power_pmu_read(event);
842 left = event->hw.sample_period; 881 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
843 event->hw.last_period = left; 882 write_pmc(event->hw.idx, 0);
844 val = 0; 883
845 if (left < 0x80000000L)
846 val = 0x80000000L - left;
847 write_pmc(event->hw.idx, val);
848 local64_set(&event->hw.prev_count, val);
849 local64_set(&event->hw.period_left, left);
850 perf_event_update_userpage(event); 884 perf_event_update_userpage(event);
851 perf_enable(); 885 perf_pmu_enable(event->pmu);
852 local_irq_restore(flags); 886 local_irq_restore(flags);
853} 887}
854 888
@@ -857,10 +891,11 @@ static void power_pmu_unthrottle(struct perf_event *event)
857 * Set the flag to make pmu::enable() not perform the 891 * Set the flag to make pmu::enable() not perform the
858 * schedulability test, it will be performed at commit time 892 * schedulability test, it will be performed at commit time
859 */ 893 */
860void power_pmu_start_txn(const struct pmu *pmu) 894void power_pmu_start_txn(struct pmu *pmu)
861{ 895{
862 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 896 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
863 897
898 perf_pmu_disable(pmu);
864 cpuhw->group_flag |= PERF_EVENT_TXN; 899 cpuhw->group_flag |= PERF_EVENT_TXN;
865 cpuhw->n_txn_start = cpuhw->n_events; 900 cpuhw->n_txn_start = cpuhw->n_events;
866} 901}
@@ -870,11 +905,12 @@ void power_pmu_start_txn(const struct pmu *pmu)
870 * Clear the flag and pmu::enable() will perform the 905 * Clear the flag and pmu::enable() will perform the
871 * schedulability test. 906 * schedulability test.
872 */ 907 */
873void power_pmu_cancel_txn(const struct pmu *pmu) 908void power_pmu_cancel_txn(struct pmu *pmu)
874{ 909{
875 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 910 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
876 911
877 cpuhw->group_flag &= ~PERF_EVENT_TXN; 912 cpuhw->group_flag &= ~PERF_EVENT_TXN;
913 perf_pmu_enable(pmu);
878} 914}
879 915
880/* 916/*
@@ -882,7 +918,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
882 * Perform the group schedulability test as a whole 918 * Perform the group schedulability test as a whole
883 * Return 0 if success 919 * Return 0 if success
884 */ 920 */
885int power_pmu_commit_txn(const struct pmu *pmu) 921int power_pmu_commit_txn(struct pmu *pmu)
886{ 922{
887 struct cpu_hw_events *cpuhw; 923 struct cpu_hw_events *cpuhw;
888 long i, n; 924 long i, n;
@@ -901,19 +937,10 @@ int power_pmu_commit_txn(const struct pmu *pmu)
901 cpuhw->event[i]->hw.config = cpuhw->events[i]; 937 cpuhw->event[i]->hw.config = cpuhw->events[i];
902 938
903 cpuhw->group_flag &= ~PERF_EVENT_TXN; 939 cpuhw->group_flag &= ~PERF_EVENT_TXN;
940 perf_pmu_enable(pmu);
904 return 0; 941 return 0;
905} 942}
906 943
907struct pmu power_pmu = {
908 .enable = power_pmu_enable,
909 .disable = power_pmu_disable,
910 .read = power_pmu_read,
911 .unthrottle = power_pmu_unthrottle,
912 .start_txn = power_pmu_start_txn,
913 .cancel_txn = power_pmu_cancel_txn,
914 .commit_txn = power_pmu_commit_txn,
915};
916
917/* 944/*
918 * Return 1 if we might be able to put event on a limited PMC, 945 * Return 1 if we might be able to put event on a limited PMC,
919 * or 0 if not. 946 * or 0 if not.
@@ -1014,7 +1041,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
1014 return 0; 1041 return 0;
1015} 1042}
1016 1043
1017const struct pmu *hw_perf_event_init(struct perf_event *event) 1044static int power_pmu_event_init(struct perf_event *event)
1018{ 1045{
1019 u64 ev; 1046 u64 ev;
1020 unsigned long flags; 1047 unsigned long flags;
@@ -1026,25 +1053,27 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1026 struct cpu_hw_events *cpuhw; 1053 struct cpu_hw_events *cpuhw;
1027 1054
1028 if (!ppmu) 1055 if (!ppmu)
1029 return ERR_PTR(-ENXIO); 1056 return -ENOENT;
1057
1030 switch (event->attr.type) { 1058 switch (event->attr.type) {
1031 case PERF_TYPE_HARDWARE: 1059 case PERF_TYPE_HARDWARE:
1032 ev = event->attr.config; 1060 ev = event->attr.config;
1033 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 1061 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
1034 return ERR_PTR(-EOPNOTSUPP); 1062 return -EOPNOTSUPP;
1035 ev = ppmu->generic_events[ev]; 1063 ev = ppmu->generic_events[ev];
1036 break; 1064 break;
1037 case PERF_TYPE_HW_CACHE: 1065 case PERF_TYPE_HW_CACHE:
1038 err = hw_perf_cache_event(event->attr.config, &ev); 1066 err = hw_perf_cache_event(event->attr.config, &ev);
1039 if (err) 1067 if (err)
1040 return ERR_PTR(err); 1068 return err;
1041 break; 1069 break;
1042 case PERF_TYPE_RAW: 1070 case PERF_TYPE_RAW:
1043 ev = event->attr.config; 1071 ev = event->attr.config;
1044 break; 1072 break;
1045 default: 1073 default:
1046 return ERR_PTR(-EINVAL); 1074 return -ENOENT;
1047 } 1075 }
1076
1048 event->hw.config_base = ev; 1077 event->hw.config_base = ev;
1049 event->hw.idx = 0; 1078 event->hw.idx = 0;
1050 1079
@@ -1063,7 +1092,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1063 * XXX we should check if the task is an idle task. 1092 * XXX we should check if the task is an idle task.
1064 */ 1093 */
1065 flags = 0; 1094 flags = 0;
1066 if (event->ctx->task) 1095 if (event->attach_state & PERF_ATTACH_TASK)
1067 flags |= PPMU_ONLY_COUNT_RUN; 1096 flags |= PPMU_ONLY_COUNT_RUN;
1068 1097
1069 /* 1098 /*
@@ -1081,7 +1110,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1081 */ 1110 */
1082 ev = normal_pmc_alternative(ev, flags); 1111 ev = normal_pmc_alternative(ev, flags);
1083 if (!ev) 1112 if (!ev)
1084 return ERR_PTR(-EINVAL); 1113 return -EINVAL;
1085 } 1114 }
1086 } 1115 }
1087 1116
@@ -1095,19 +1124,19 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1095 n = collect_events(event->group_leader, ppmu->n_counter - 1, 1124 n = collect_events(event->group_leader, ppmu->n_counter - 1,
1096 ctrs, events, cflags); 1125 ctrs, events, cflags);
1097 if (n < 0) 1126 if (n < 0)
1098 return ERR_PTR(-EINVAL); 1127 return -EINVAL;
1099 } 1128 }
1100 events[n] = ev; 1129 events[n] = ev;
1101 ctrs[n] = event; 1130 ctrs[n] = event;
1102 cflags[n] = flags; 1131 cflags[n] = flags;
1103 if (check_excludes(ctrs, cflags, n, 1)) 1132 if (check_excludes(ctrs, cflags, n, 1))
1104 return ERR_PTR(-EINVAL); 1133 return -EINVAL;
1105 1134
1106 cpuhw = &get_cpu_var(cpu_hw_events); 1135 cpuhw = &get_cpu_var(cpu_hw_events);
1107 err = power_check_constraints(cpuhw, events, cflags, n + 1); 1136 err = power_check_constraints(cpuhw, events, cflags, n + 1);
1108 put_cpu_var(cpu_hw_events); 1137 put_cpu_var(cpu_hw_events);
1109 if (err) 1138 if (err)
1110 return ERR_PTR(-EINVAL); 1139 return -EINVAL;
1111 1140
1112 event->hw.config = events[n]; 1141 event->hw.config = events[n];
1113 event->hw.event_base = cflags[n]; 1142 event->hw.event_base = cflags[n];
@@ -1132,11 +1161,23 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1132 } 1161 }
1133 event->destroy = hw_perf_event_destroy; 1162 event->destroy = hw_perf_event_destroy;
1134 1163
1135 if (err) 1164 return err;
1136 return ERR_PTR(err);
1137 return &power_pmu;
1138} 1165}
1139 1166
1167struct pmu power_pmu = {
1168 .pmu_enable = power_pmu_enable,
1169 .pmu_disable = power_pmu_disable,
1170 .event_init = power_pmu_event_init,
1171 .add = power_pmu_add,
1172 .del = power_pmu_del,
1173 .start = power_pmu_start,
1174 .stop = power_pmu_stop,
1175 .read = power_pmu_read,
1176 .start_txn = power_pmu_start_txn,
1177 .cancel_txn = power_pmu_cancel_txn,
1178 .commit_txn = power_pmu_commit_txn,
1179};
1180
1140/* 1181/*
1141 * A counter has overflowed; update its count and record 1182 * A counter has overflowed; update its count and record
1142 * things if requested. Note that interrupts are hard-disabled 1183 * things if requested. Note that interrupts are hard-disabled
@@ -1149,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1149 s64 prev, delta, left; 1190 s64 prev, delta, left;
1150 int record = 0; 1191 int record = 0;
1151 1192
1193 if (event->hw.state & PERF_HES_STOPPED) {
1194 write_pmc(event->hw.idx, 0);
1195 return;
1196 }
1197
1152 /* we don't have to worry about interrupts here */ 1198 /* we don't have to worry about interrupts here */
1153 prev = local64_read(&event->hw.prev_count); 1199 prev = local64_read(&event->hw.prev_count);
1154 delta = (val - prev) & 0xfffffffful; 1200 delta = (val - prev) & 0xfffffffful;
@@ -1171,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1171 val = 0x80000000LL - left; 1217 val = 0x80000000LL - left;
1172 } 1218 }
1173 1219
1220 write_pmc(event->hw.idx, val);
1221 local64_set(&event->hw.prev_count, val);
1222 local64_set(&event->hw.period_left, left);
1223 perf_event_update_userpage(event);
1224
1174 /* 1225 /*
1175 * Finally record data if requested. 1226 * Finally record data if requested.
1176 */ 1227 */
@@ -1183,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1183 if (event->attr.sample_type & PERF_SAMPLE_ADDR) 1234 if (event->attr.sample_type & PERF_SAMPLE_ADDR)
1184 perf_get_data_addr(regs, &data.addr); 1235 perf_get_data_addr(regs, &data.addr);
1185 1236
1186 if (perf_event_overflow(event, nmi, &data, regs)) { 1237 if (perf_event_overflow(event, nmi, &data, regs))
1187 /* 1238 power_pmu_stop(event, 0);
1188 * Interrupts are coming too fast - throttle them
1189 * by setting the event to 0, so it will be
1190 * at least 2^30 cycles until the next interrupt
1191 * (assuming each event counts at most 2 counts
1192 * per cycle).
1193 */
1194 val = 0;
1195 left = ~0ULL >> 1;
1196 }
1197 } 1239 }
1198
1199 write_pmc(event->hw.idx, val);
1200 local64_set(&event->hw.prev_count, val);
1201 local64_set(&event->hw.period_left, left);
1202 perf_event_update_userpage(event);
1203} 1240}
1204 1241
1205/* 1242/*
@@ -1342,6 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
1342 freeze_events_kernel = MMCR0_FCHV; 1379 freeze_events_kernel = MMCR0_FCHV;
1343#endif /* CONFIG_PPC64 */ 1380#endif /* CONFIG_PPC64 */
1344 1381
1382 perf_pmu_register(&power_pmu);
1345 perf_cpu_notifier(power_pmu_notifier); 1383 perf_cpu_notifier(power_pmu_notifier);
1346 1384
1347 return 0; 1385 return 0;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 1ba45471ae43..7ecca59ddf77 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event)
156{ 156{
157 s64 val, delta, prev; 157 s64 val, delta, prev;
158 158
159 if (event->hw.state & PERF_HES_STOPPED)
160 return;
161
159 /* 162 /*
160 * Performance monitor interrupts come even when interrupts 163 * Performance monitor interrupts come even when interrupts
161 * are soft-disabled, as long as interrupts are hard-enabled. 164 * are soft-disabled, as long as interrupts are hard-enabled.
@@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event)
177 * Disable all events to prevent PMU interrupts and to allow 180 * Disable all events to prevent PMU interrupts and to allow
178 * events to be added or removed. 181 * events to be added or removed.
179 */ 182 */
180void hw_perf_disable(void) 183static void fsl_emb_pmu_disable(struct pmu *pmu)
181{ 184{
182 struct cpu_hw_events *cpuhw; 185 struct cpu_hw_events *cpuhw;
183 unsigned long flags; 186 unsigned long flags;
@@ -216,7 +219,7 @@ void hw_perf_disable(void)
216 * If we were previously disabled and events were added, then 219 * If we were previously disabled and events were added, then
217 * put the new config on the PMU. 220 * put the new config on the PMU.
218 */ 221 */
219void hw_perf_enable(void) 222static void fsl_emb_pmu_enable(struct pmu *pmu)
220{ 223{
221 struct cpu_hw_events *cpuhw; 224 struct cpu_hw_events *cpuhw;
222 unsigned long flags; 225 unsigned long flags;
@@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count,
262 return n; 265 return n;
263} 266}
264 267
265/* perf must be disabled, context locked on entry */ 268/* context locked on entry */
266static int fsl_emb_pmu_enable(struct perf_event *event) 269static int fsl_emb_pmu_add(struct perf_event *event, int flags)
267{ 270{
268 struct cpu_hw_events *cpuhw; 271 struct cpu_hw_events *cpuhw;
269 int ret = -EAGAIN; 272 int ret = -EAGAIN;
@@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
271 u64 val; 274 u64 val;
272 int i; 275 int i;
273 276
277 perf_pmu_disable(event->pmu);
274 cpuhw = &get_cpu_var(cpu_hw_events); 278 cpuhw = &get_cpu_var(cpu_hw_events);
275 279
276 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) 280 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
@@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
301 val = 0x80000000L - left; 305 val = 0x80000000L - left;
302 } 306 }
303 local64_set(&event->hw.prev_count, val); 307 local64_set(&event->hw.prev_count, val);
308
309 if (!(flags & PERF_EF_START)) {
310 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
311 val = 0;
312 }
313
304 write_pmc(i, val); 314 write_pmc(i, val);
305 perf_event_update_userpage(event); 315 perf_event_update_userpage(event);
306 316
@@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
310 ret = 0; 320 ret = 0;
311 out: 321 out:
312 put_cpu_var(cpu_hw_events); 322 put_cpu_var(cpu_hw_events);
323 perf_pmu_enable(event->pmu);
313 return ret; 324 return ret;
314} 325}
315 326
316/* perf must be disabled, context locked on entry */ 327/* context locked on entry */
317static void fsl_emb_pmu_disable(struct perf_event *event) 328static void fsl_emb_pmu_del(struct perf_event *event, int flags)
318{ 329{
319 struct cpu_hw_events *cpuhw; 330 struct cpu_hw_events *cpuhw;
320 int i = event->hw.idx; 331 int i = event->hw.idx;
321 332
333 perf_pmu_disable(event->pmu);
322 if (i < 0) 334 if (i < 0)
323 goto out; 335 goto out;
324 336
@@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
346 cpuhw->n_events--; 358 cpuhw->n_events--;
347 359
348 out: 360 out:
361 perf_pmu_enable(event->pmu);
349 put_cpu_var(cpu_hw_events); 362 put_cpu_var(cpu_hw_events);
350} 363}
351 364
352/* 365static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
353 * Re-enable interrupts on a event after they were throttled
354 * because they were coming too fast.
355 *
356 * Context is locked on entry, but perf is not disabled.
357 */
358static void fsl_emb_pmu_unthrottle(struct perf_event *event)
359{ 366{
360 s64 val, left;
361 unsigned long flags; 367 unsigned long flags;
368 s64 left;
362 369
363 if (event->hw.idx < 0 || !event->hw.sample_period) 370 if (event->hw.idx < 0 || !event->hw.sample_period)
364 return; 371 return;
372
373 if (!(event->hw.state & PERF_HES_STOPPED))
374 return;
375
376 if (ef_flags & PERF_EF_RELOAD)
377 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
378
365 local_irq_save(flags); 379 local_irq_save(flags);
366 perf_disable(); 380 perf_pmu_disable(event->pmu);
367 fsl_emb_pmu_read(event); 381
368 left = event->hw.sample_period; 382 event->hw.state = 0;
369 event->hw.last_period = left; 383 left = local64_read(&event->hw.period_left);
370 val = 0; 384 write_pmc(event->hw.idx, left);
371 if (left < 0x80000000L) 385
372 val = 0x80000000L - left;
373 write_pmc(event->hw.idx, val);
374 local64_set(&event->hw.prev_count, val);
375 local64_set(&event->hw.period_left, left);
376 perf_event_update_userpage(event); 386 perf_event_update_userpage(event);
377 perf_enable(); 387 perf_pmu_enable(event->pmu);
378 local_irq_restore(flags); 388 local_irq_restore(flags);
379} 389}
380 390
381static struct pmu fsl_emb_pmu = { 391static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
382 .enable = fsl_emb_pmu_enable, 392{
383 .disable = fsl_emb_pmu_disable, 393 unsigned long flags;
384 .read = fsl_emb_pmu_read, 394
385 .unthrottle = fsl_emb_pmu_unthrottle, 395 if (event->hw.idx < 0 || !event->hw.sample_period)
386}; 396 return;
397
398 if (event->hw.state & PERF_HES_STOPPED)
399 return;
400
401 local_irq_save(flags);
402 perf_pmu_disable(event->pmu);
403
404 fsl_emb_pmu_read(event);
405 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
406 write_pmc(event->hw.idx, 0);
407
408 perf_event_update_userpage(event);
409 perf_pmu_enable(event->pmu);
410 local_irq_restore(flags);
411}
387 412
388/* 413/*
389 * Release the PMU if this is the last perf_event. 414 * Release the PMU if this is the last perf_event.
@@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
428 return 0; 453 return 0;
429} 454}
430 455
431const struct pmu *hw_perf_event_init(struct perf_event *event) 456static int fsl_emb_pmu_event_init(struct perf_event *event)
432{ 457{
433 u64 ev; 458 u64 ev;
434 struct perf_event *events[MAX_HWEVENTS]; 459 struct perf_event *events[MAX_HWEVENTS];
@@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
441 case PERF_TYPE_HARDWARE: 466 case PERF_TYPE_HARDWARE:
442 ev = event->attr.config; 467 ev = event->attr.config;
443 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 468 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
444 return ERR_PTR(-EOPNOTSUPP); 469 return -EOPNOTSUPP;
445 ev = ppmu->generic_events[ev]; 470 ev = ppmu->generic_events[ev];
446 break; 471 break;
447 472
448 case PERF_TYPE_HW_CACHE: 473 case PERF_TYPE_HW_CACHE:
449 err = hw_perf_cache_event(event->attr.config, &ev); 474 err = hw_perf_cache_event(event->attr.config, &ev);
450 if (err) 475 if (err)
451 return ERR_PTR(err); 476 return err;
452 break; 477 break;
453 478
454 case PERF_TYPE_RAW: 479 case PERF_TYPE_RAW:
@@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
456 break; 481 break;
457 482
458 default: 483 default:
459 return ERR_PTR(-EINVAL); 484 return -ENOENT;
460 } 485 }
461 486
462 event->hw.config = ppmu->xlate_event(ev); 487 event->hw.config = ppmu->xlate_event(ev);
463 if (!(event->hw.config & FSL_EMB_EVENT_VALID)) 488 if (!(event->hw.config & FSL_EMB_EVENT_VALID))
464 return ERR_PTR(-EINVAL); 489 return -EINVAL;
465 490
466 /* 491 /*
467 * If this is in a group, check if it can go on with all the 492 * If this is in a group, check if it can go on with all the
@@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
473 n = collect_events(event->group_leader, 498 n = collect_events(event->group_leader,
474 ppmu->n_counter - 1, events); 499 ppmu->n_counter - 1, events);
475 if (n < 0) 500 if (n < 0)
476 return ERR_PTR(-EINVAL); 501 return -EINVAL;
477 } 502 }
478 503
479 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { 504 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
@@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
484 } 509 }
485 510
486 if (num_restricted >= ppmu->n_restricted) 511 if (num_restricted >= ppmu->n_restricted)
487 return ERR_PTR(-EINVAL); 512 return -EINVAL;
488 } 513 }
489 514
490 event->hw.idx = -1; 515 event->hw.idx = -1;
@@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
497 if (event->attr.exclude_kernel) 522 if (event->attr.exclude_kernel)
498 event->hw.config_base |= PMLCA_FCS; 523 event->hw.config_base |= PMLCA_FCS;
499 if (event->attr.exclude_idle) 524 if (event->attr.exclude_idle)
500 return ERR_PTR(-ENOTSUPP); 525 return -ENOTSUPP;
501 526
502 event->hw.last_period = event->hw.sample_period; 527 event->hw.last_period = event->hw.sample_period;
503 local64_set(&event->hw.period_left, event->hw.last_period); 528 local64_set(&event->hw.period_left, event->hw.last_period);
@@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
523 } 548 }
524 event->destroy = hw_perf_event_destroy; 549 event->destroy = hw_perf_event_destroy;
525 550
526 if (err) 551 return err;
527 return ERR_PTR(err);
528 return &fsl_emb_pmu;
529} 552}
530 553
554static struct pmu fsl_emb_pmu = {
555 .pmu_enable = fsl_emb_pmu_enable,
556 .pmu_disable = fsl_emb_pmu_disable,
557 .event_init = fsl_emb_pmu_event_init,
558 .add = fsl_emb_pmu_add,
559 .del = fsl_emb_pmu_del,
560 .start = fsl_emb_pmu_start,
561 .stop = fsl_emb_pmu_stop,
562 .read = fsl_emb_pmu_read,
563};
564
531/* 565/*
532 * A counter has overflowed; update its count and record 566 * A counter has overflowed; update its count and record
533 * things if requested. Note that interrupts are hard-disabled 567 * things if requested. Note that interrupts are hard-disabled
@@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
540 s64 prev, delta, left; 574 s64 prev, delta, left;
541 int record = 0; 575 int record = 0;
542 576
577 if (event->hw.state & PERF_HES_STOPPED) {
578 write_pmc(event->hw.idx, 0);
579 return;
580 }
581
543 /* we don't have to worry about interrupts here */ 582 /* we don't have to worry about interrupts here */
544 prev = local64_read(&event->hw.prev_count); 583 prev = local64_read(&event->hw.prev_count);
545 delta = (val - prev) & 0xfffffffful; 584 delta = (val - prev) & 0xfffffffful;
@@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
562 val = 0x80000000LL - left; 601 val = 0x80000000LL - left;
563 } 602 }
564 603
604 write_pmc(event->hw.idx, val);
605 local64_set(&event->hw.prev_count, val);
606 local64_set(&event->hw.period_left, left);
607 perf_event_update_userpage(event);
608
565 /* 609 /*
566 * Finally record data if requested. 610 * Finally record data if requested.
567 */ 611 */
@@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
571 perf_sample_data_init(&data, 0); 615 perf_sample_data_init(&data, 0);
572 data.period = event->hw.last_period; 616 data.period = event->hw.last_period;
573 617
574 if (perf_event_overflow(event, nmi, &data, regs)) { 618 if (perf_event_overflow(event, nmi, &data, regs))
575 /* 619 fsl_emb_pmu_stop(event, 0);
576 * Interrupts are coming too fast - throttle them
577 * by setting the event to 0, so it will be
578 * at least 2^30 cycles until the next interrupt
579 * (assuming each event counts at most 2 counts
580 * per cycle).
581 */
582 val = 0;
583 left = ~0ULL >> 1;
584 }
585 } 620 }
586
587 write_pmc(event->hw.idx, val);
588 local64_set(&event->hw.prev_count, val);
589 local64_set(&event->hw.period_left, left);
590 perf_event_update_userpage(event);
591} 621}
592 622
593static void perf_event_interrupt(struct pt_regs *regs) 623static void perf_event_interrupt(struct pt_regs *regs)
@@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
651 pr_info("%s performance monitor hardware support registered\n", 681 pr_info("%s performance monitor hardware support registered\n",
652 pmu->name); 682 pmu->name);
653 683
684 perf_pmu_register(&fsl_emb_pmu);
685
654 return 0; 686 return 0;
655} 687}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 8533b3b83f5d..54888eb10c3b 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -53,7 +53,7 @@
53#include <linux/posix-timers.h> 53#include <linux/posix-timers.h>
54#include <linux/irq.h> 54#include <linux/irq.h>
55#include <linux/delay.h> 55#include <linux/delay.h>
56#include <linux/perf_event.h> 56#include <linux/irq_work.h>
57#include <asm/trace.h> 57#include <asm/trace.h>
58 58
59#include <asm/io.h> 59#include <asm/io.h>
@@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void)
493} 493}
494#endif /* CONFIG_PPC_ISERIES */ 494#endif /* CONFIG_PPC_ISERIES */
495 495
496#ifdef CONFIG_PERF_EVENTS 496#ifdef CONFIG_IRQ_WORK
497 497
498/* 498/*
499 * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... 499 * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
500 */ 500 */
501#ifdef CONFIG_PPC64 501#ifdef CONFIG_PPC64
502static inline unsigned long test_perf_event_pending(void) 502static inline unsigned long test_irq_work_pending(void)
503{ 503{
504 unsigned long x; 504 unsigned long x;
505 505
506 asm volatile("lbz %0,%1(13)" 506 asm volatile("lbz %0,%1(13)"
507 : "=r" (x) 507 : "=r" (x)
508 : "i" (offsetof(struct paca_struct, perf_event_pending))); 508 : "i" (offsetof(struct paca_struct, irq_work_pending)));
509 return x; 509 return x;
510} 510}
511 511
512static inline void set_perf_event_pending_flag(void) 512static inline void set_irq_work_pending_flag(void)
513{ 513{
514 asm volatile("stb %0,%1(13)" : : 514 asm volatile("stb %0,%1(13)" : :
515 "r" (1), 515 "r" (1),
516 "i" (offsetof(struct paca_struct, perf_event_pending))); 516 "i" (offsetof(struct paca_struct, irq_work_pending)));
517} 517}
518 518
519static inline void clear_perf_event_pending(void) 519static inline void clear_irq_work_pending(void)
520{ 520{
521 asm volatile("stb %0,%1(13)" : : 521 asm volatile("stb %0,%1(13)" : :
522 "r" (0), 522 "r" (0),
523 "i" (offsetof(struct paca_struct, perf_event_pending))); 523 "i" (offsetof(struct paca_struct, irq_work_pending)));
524} 524}
525 525
526#else /* 32-bit */ 526#else /* 32-bit */
527 527
528DEFINE_PER_CPU(u8, perf_event_pending); 528DEFINE_PER_CPU(u8, irq_work_pending);
529 529
530#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 530#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
531#define test_perf_event_pending() __get_cpu_var(perf_event_pending) 531#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
532#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 532#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
533 533
534#endif /* 32 vs 64 bit */ 534#endif /* 32 vs 64 bit */
535 535
536void set_perf_event_pending(void) 536void set_irq_work_pending(void)
537{ 537{
538 preempt_disable(); 538 preempt_disable();
539 set_perf_event_pending_flag(); 539 set_irq_work_pending_flag();
540 set_dec(1); 540 set_dec(1);
541 preempt_enable(); 541 preempt_enable();
542} 542}
543 543
544#else /* CONFIG_PERF_EVENTS */ 544#else /* CONFIG_IRQ_WORK */
545 545
546#define test_perf_event_pending() 0 546#define test_irq_work_pending() 0
547#define clear_perf_event_pending() 547#define clear_irq_work_pending()
548 548
549#endif /* CONFIG_PERF_EVENTS */ 549#endif /* CONFIG_IRQ_WORK */
550 550
551/* 551/*
552 * For iSeries shared processors, we have to let the hypervisor 552 * For iSeries shared processors, we have to let the hypervisor
@@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs)
587 587
588 calculate_steal_time(); 588 calculate_steal_time();
589 589
590 if (test_perf_event_pending()) { 590 if (test_irq_work_pending()) {
591 clear_perf_event_pending(); 591 clear_irq_work_pending();
592 perf_event_do_pending(); 592 irq_work_run();
593 } 593 }
594 594
595#ifdef CONFIG_PPC_ISERIES 595#ifdef CONFIG_PPC_ISERIES
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 97085530aa63..e3e379c6caa7 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -310,9 +310,9 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
310} 310}
311 311
312static struct irq_chip msic_irq_chip = { 312static struct irq_chip msic_irq_chip = {
313 .mask = mask_msi_irq, 313 .irq_mask = mask_msi_irq,
314 .unmask = unmask_msi_irq, 314 .irq_unmask = unmask_msi_irq,
315 .shutdown = unmask_msi_irq, 315 .irq_shutdown = mask_msi_irq,
316 .name = "AXON-MSI", 316 .name = "AXON-MSI",
317}; 317};
318 318
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 93834b0d8272..67e2c4bdac8f 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -243,7 +243,7 @@ static unsigned int xics_startup(unsigned int virq)
243 * at that level, so we do it here by hand. 243 * at that level, so we do it here by hand.
244 */ 244 */
245 if (irq_to_desc(virq)->msi_desc) 245 if (irq_to_desc(virq)->msi_desc)
246 unmask_msi_irq(virq); 246 unmask_msi_irq(irq_get_irq_data(virq));
247 247
248 /* unmask it */ 248 /* unmask it */
249 xics_unmask_irq(virq); 249 xics_unmask_irq(virq);
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 87991d3abbab..bdbd896c89d8 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -51,8 +51,8 @@ static void fsl_msi_end_irq(unsigned int virq)
51} 51}
52 52
53static struct irq_chip fsl_msi_chip = { 53static struct irq_chip fsl_msi_chip = {
54 .mask = mask_msi_irq, 54 .irq_mask = mask_msi_irq,
55 .unmask = unmask_msi_irq, 55 .irq_unmask = unmask_msi_irq,
56 .ack = fsl_msi_end_irq, 56 .ack = fsl_msi_end_irq,
57 .name = "FSL-MSI", 57 .name = "FSL-MSI",
58}; 58};
diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c
index 3b6a9a43718f..320ad5a9a25d 100644
--- a/arch/powerpc/sysdev/mpic_pasemi_msi.c
+++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c
@@ -39,24 +39,24 @@
39static struct mpic *msi_mpic; 39static struct mpic *msi_mpic;
40 40
41 41
42static void mpic_pasemi_msi_mask_irq(unsigned int irq) 42static void mpic_pasemi_msi_mask_irq(struct irq_data *data)
43{ 43{
44 pr_debug("mpic_pasemi_msi_mask_irq %d\n", irq); 44 pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq);
45 mask_msi_irq(irq); 45 mask_msi_irq(data);
46 mpic_mask_irq(irq); 46 mpic_mask_irq(data->irq);
47} 47}
48 48
49static void mpic_pasemi_msi_unmask_irq(unsigned int irq) 49static void mpic_pasemi_msi_unmask_irq(struct irq_data *data)
50{ 50{
51 pr_debug("mpic_pasemi_msi_unmask_irq %d\n", irq); 51 pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq);
52 mpic_unmask_irq(irq); 52 mpic_unmask_irq(data->irq);
53 unmask_msi_irq(irq); 53 unmask_msi_irq(data);
54} 54}
55 55
56static struct irq_chip mpic_pasemi_msi_chip = { 56static struct irq_chip mpic_pasemi_msi_chip = {
57 .shutdown = mpic_pasemi_msi_mask_irq, 57 .irq_shutdown = mpic_pasemi_msi_mask_irq,
58 .mask = mpic_pasemi_msi_mask_irq, 58 .irq_mask = mpic_pasemi_msi_mask_irq,
59 .unmask = mpic_pasemi_msi_unmask_irq, 59 .irq_unmask = mpic_pasemi_msi_unmask_irq,
60 .eoi = mpic_end_irq, 60 .eoi = mpic_end_irq,
61 .set_type = mpic_set_irq_type, 61 .set_type = mpic_set_irq_type,
62 .set_affinity = mpic_set_affinity, 62 .set_affinity = mpic_set_affinity,
diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c
index bcbfe79c704b..a2b028b4a202 100644
--- a/arch/powerpc/sysdev/mpic_u3msi.c
+++ b/arch/powerpc/sysdev/mpic_u3msi.c
@@ -23,22 +23,22 @@
23/* A bit ugly, can we get this from the pci_dev somehow? */ 23/* A bit ugly, can we get this from the pci_dev somehow? */
24static struct mpic *msi_mpic; 24static struct mpic *msi_mpic;
25 25
26static void mpic_u3msi_mask_irq(unsigned int irq) 26static void mpic_u3msi_mask_irq(struct irq_data *data)
27{ 27{
28 mask_msi_irq(irq); 28 mask_msi_irq(data);
29 mpic_mask_irq(irq); 29 mpic_mask_irq(data->irq);
30} 30}
31 31
32static void mpic_u3msi_unmask_irq(unsigned int irq) 32static void mpic_u3msi_unmask_irq(struct irq_data *data)
33{ 33{
34 mpic_unmask_irq(irq); 34 mpic_unmask_irq(data->irq);
35 unmask_msi_irq(irq); 35 unmask_msi_irq(data);
36} 36}
37 37
38static struct irq_chip mpic_u3msi_chip = { 38static struct irq_chip mpic_u3msi_chip = {
39 .shutdown = mpic_u3msi_mask_irq, 39 .irq_shutdown = mpic_u3msi_mask_irq,
40 .mask = mpic_u3msi_mask_irq, 40 .irq_mask = mpic_u3msi_mask_irq,
41 .unmask = mpic_u3msi_unmask_irq, 41 .irq_unmask = mpic_u3msi_unmask_irq,
42 .eoi = mpic_end_irq, 42 .eoi = mpic_end_irq,
43 .set_type = mpic_set_irq_type, 43 .set_type = mpic_set_irq_type,
44 .set_affinity = mpic_set_affinity, 44 .set_affinity = mpic_set_affinity,
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f0777a47e3a5..75976a141947 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -95,6 +95,7 @@ config S390
95 select HAVE_KVM if 64BIT 95 select HAVE_KVM if 64BIT
96 select HAVE_ARCH_TRACEHOOK 96 select HAVE_ARCH_TRACEHOOK
97 select INIT_ALL_POSSIBLE 97 select INIT_ALL_POSSIBLE
98 select HAVE_IRQ_WORK
98 select HAVE_PERF_EVENTS 99 select HAVE_PERF_EVENTS
99 select HAVE_KERNEL_GZIP 100 select HAVE_KERNEL_GZIP
100 select HAVE_KERNEL_BZIP2 101 select HAVE_KERNEL_BZIP2
@@ -198,6 +199,13 @@ config HOTPLUG_CPU
198 can be controlled through /sys/devices/system/cpu/cpu#. 199 can be controlled through /sys/devices/system/cpu/cpu#.
199 Say N if you want to disable CPU hotplug. 200 Say N if you want to disable CPU hotplug.
200 201
202config SCHED_BOOK
203 bool "Book scheduler support"
204 depends on SMP
205 help
206 Book scheduler support improves the CPU scheduler's decision making
207 when dealing with machines that have several books.
208
201config MATHEMU 209config MATHEMU
202 bool "IEEE FPU emulation" 210 bool "IEEE FPU emulation"
203 depends on MARCH_G5 211 depends on MARCH_G5
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index 498bc3892385..881d94590aeb 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -12,10 +12,6 @@
12#ifndef __ASM_HARDIRQ_H 12#ifndef __ASM_HARDIRQ_H
13#define __ASM_HARDIRQ_H 13#define __ASM_HARDIRQ_H
14 14
15#include <linux/threads.h>
16#include <linux/sched.h>
17#include <linux/cache.h>
18#include <linux/interrupt.h>
19#include <asm/lowcore.h> 15#include <asm/lowcore.h>
20 16
21#define local_softirq_pending() (S390_lowcore.softirq_pending) 17#define local_softirq_pending() (S390_lowcore.softirq_pending)
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 3840cbe77637..a75f168d2718 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -4,7 +4,6 @@
4 * Copyright 2009 Martin Schwidefsky, IBM Corporation. 4 * Copyright 2009 Martin Schwidefsky, IBM Corporation.
5 */ 5 */
6 6
7static inline void set_perf_event_pending(void) {} 7/* Empty, just to avoid compiling error */
8static inline void clear_perf_event_pending(void) {}
9 8
10#define PERF_EVENT_INDEX_OFFSET 0 9#define PERF_EVENT_INDEX_OFFSET 0
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 8e8a50eeed92..1f2ebc4afd82 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -97,7 +97,6 @@ static inline void restore_access_regs(unsigned int *acrs)
97 97
98extern void account_vtime(struct task_struct *, struct task_struct *); 98extern void account_vtime(struct task_struct *, struct task_struct *);
99extern void account_tick_vtime(struct task_struct *); 99extern void account_tick_vtime(struct task_struct *);
100extern void account_system_vtime(struct task_struct *);
101 100
102#ifdef CONFIG_PFAULT 101#ifdef CONFIG_PFAULT
103extern void pfault_irq_init(void); 102extern void pfault_irq_init(void);
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 831bd033ea77..051107a2c5e2 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -3,15 +3,32 @@
3 3
4#include <linux/cpumask.h> 4#include <linux/cpumask.h>
5 5
6#define mc_capable() (1)
7
8const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
9
10extern unsigned char cpu_core_id[NR_CPUS]; 6extern unsigned char cpu_core_id[NR_CPUS];
11extern cpumask_t cpu_core_map[NR_CPUS]; 7extern cpumask_t cpu_core_map[NR_CPUS];
12 8
9static inline const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
10{
11 return &cpu_core_map[cpu];
12}
13
13#define topology_core_id(cpu) (cpu_core_id[cpu]) 14#define topology_core_id(cpu) (cpu_core_id[cpu])
14#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) 15#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
16#define mc_capable() (1)
17
18#ifdef CONFIG_SCHED_BOOK
19
20extern unsigned char cpu_book_id[NR_CPUS];
21extern cpumask_t cpu_book_map[NR_CPUS];
22
23static inline const struct cpumask *cpu_book_mask(unsigned int cpu)
24{
25 return &cpu_book_map[cpu];
26}
27
28#define topology_book_id(cpu) (cpu_book_id[cpu])
29#define topology_book_cpumask(cpu) (&cpu_book_map[cpu])
30
31#endif /* CONFIG_SCHED_BOOK */
15 32
16int topology_set_cpu_management(int fc); 33int topology_set_cpu_management(int fc);
17void topology_schedule_update(void); 34void topology_schedule_update(void);
@@ -30,6 +47,8 @@ static inline void s390_init_cpu_topology(void)
30}; 47};
31#endif 48#endif
32 49
50#define SD_BOOK_INIT SD_CPU_INIT
51
33#include <asm-generic/topology.h> 52#include <asm-generic/topology.h>
34 53
35#endif /* _ASM_S390_TOPOLOGY_H */ 54#endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index bcef00766a64..13559c993847 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -57,8 +57,8 @@ struct tl_info {
57 union tl_entry tle[0]; 57 union tl_entry tle[0];
58}; 58};
59 59
60struct core_info { 60struct mask_info {
61 struct core_info *next; 61 struct mask_info *next;
62 unsigned char id; 62 unsigned char id;
63 cpumask_t mask; 63 cpumask_t mask;
64}; 64};
@@ -66,7 +66,6 @@ struct core_info {
66static int topology_enabled; 66static int topology_enabled;
67static void topology_work_fn(struct work_struct *work); 67static void topology_work_fn(struct work_struct *work);
68static struct tl_info *tl_info; 68static struct tl_info *tl_info;
69static struct core_info core_info;
70static int machine_has_topology; 69static int machine_has_topology;
71static struct timer_list topology_timer; 70static struct timer_list topology_timer;
72static void set_topology_timer(void); 71static void set_topology_timer(void);
@@ -74,38 +73,37 @@ static DECLARE_WORK(topology_work, topology_work_fn);
74/* topology_lock protects the core linked list */ 73/* topology_lock protects the core linked list */
75static DEFINE_SPINLOCK(topology_lock); 74static DEFINE_SPINLOCK(topology_lock);
76 75
76static struct mask_info core_info;
77cpumask_t cpu_core_map[NR_CPUS]; 77cpumask_t cpu_core_map[NR_CPUS];
78unsigned char cpu_core_id[NR_CPUS]; 78unsigned char cpu_core_id[NR_CPUS];
79 79
80static cpumask_t cpu_coregroup_map(unsigned int cpu) 80#ifdef CONFIG_SCHED_BOOK
81static struct mask_info book_info;
82cpumask_t cpu_book_map[NR_CPUS];
83unsigned char cpu_book_id[NR_CPUS];
84#endif
85
86static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
81{ 87{
82 struct core_info *core = &core_info;
83 unsigned long flags;
84 cpumask_t mask; 88 cpumask_t mask;
85 89
86 cpus_clear(mask); 90 cpus_clear(mask);
87 if (!topology_enabled || !machine_has_topology) 91 if (!topology_enabled || !machine_has_topology)
88 return cpu_possible_map; 92 return cpu_possible_map;
89 spin_lock_irqsave(&topology_lock, flags); 93 while (info) {
90 while (core) { 94 if (cpu_isset(cpu, info->mask)) {
91 if (cpu_isset(cpu, core->mask)) { 95 mask = info->mask;
92 mask = core->mask;
93 break; 96 break;
94 } 97 }
95 core = core->next; 98 info = info->next;
96 } 99 }
97 spin_unlock_irqrestore(&topology_lock, flags);
98 if (cpus_empty(mask)) 100 if (cpus_empty(mask))
99 mask = cpumask_of_cpu(cpu); 101 mask = cpumask_of_cpu(cpu);
100 return mask; 102 return mask;
101} 103}
102 104
103const struct cpumask *cpu_coregroup_mask(unsigned int cpu) 105static void add_cpus_to_mask(struct tl_cpu *tl_cpu, struct mask_info *book,
104{ 106 struct mask_info *core)
105 return &cpu_core_map[cpu];
106}
107
108static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
109{ 107{
110 unsigned int cpu; 108 unsigned int cpu;
111 109
@@ -117,23 +115,35 @@ static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
117 115
118 rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin; 116 rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin;
119 for_each_present_cpu(lcpu) { 117 for_each_present_cpu(lcpu) {
120 if (cpu_logical_map(lcpu) == rcpu) { 118 if (cpu_logical_map(lcpu) != rcpu)
121 cpu_set(lcpu, core->mask); 119 continue;
122 cpu_core_id[lcpu] = core->id; 120#ifdef CONFIG_SCHED_BOOK
123 smp_cpu_polarization[lcpu] = tl_cpu->pp; 121 cpu_set(lcpu, book->mask);
124 } 122 cpu_book_id[lcpu] = book->id;
123#endif
124 cpu_set(lcpu, core->mask);
125 cpu_core_id[lcpu] = core->id;
126 smp_cpu_polarization[lcpu] = tl_cpu->pp;
125 } 127 }
126 } 128 }
127} 129}
128 130
129static void clear_cores(void) 131static void clear_masks(void)
130{ 132{
131 struct core_info *core = &core_info; 133 struct mask_info *info;
132 134
133 while (core) { 135 info = &core_info;
134 cpus_clear(core->mask); 136 while (info) {
135 core = core->next; 137 cpus_clear(info->mask);
138 info = info->next;
139 }
140#ifdef CONFIG_SCHED_BOOK
141 info = &book_info;
142 while (info) {
143 cpus_clear(info->mask);
144 info = info->next;
136 } 145 }
146#endif
137} 147}
138 148
139static union tl_entry *next_tle(union tl_entry *tle) 149static union tl_entry *next_tle(union tl_entry *tle)
@@ -146,29 +156,36 @@ static union tl_entry *next_tle(union tl_entry *tle)
146 156
147static void tl_to_cores(struct tl_info *info) 157static void tl_to_cores(struct tl_info *info)
148{ 158{
159#ifdef CONFIG_SCHED_BOOK
160 struct mask_info *book = &book_info;
161#else
162 struct mask_info *book = NULL;
163#endif
164 struct mask_info *core = &core_info;
149 union tl_entry *tle, *end; 165 union tl_entry *tle, *end;
150 struct core_info *core = &core_info; 166
151 167
152 spin_lock_irq(&topology_lock); 168 spin_lock_irq(&topology_lock);
153 clear_cores(); 169 clear_masks();
154 tle = info->tle; 170 tle = info->tle;
155 end = (union tl_entry *)((unsigned long)info + info->length); 171 end = (union tl_entry *)((unsigned long)info + info->length);
156 while (tle < end) { 172 while (tle < end) {
157 switch (tle->nl) { 173 switch (tle->nl) {
158 case 5: 174#ifdef CONFIG_SCHED_BOOK
159 case 4:
160 case 3:
161 case 2: 175 case 2:
176 book = book->next;
177 book->id = tle->container.id;
162 break; 178 break;
179#endif
163 case 1: 180 case 1:
164 core = core->next; 181 core = core->next;
165 core->id = tle->container.id; 182 core->id = tle->container.id;
166 break; 183 break;
167 case 0: 184 case 0:
168 add_cpus_to_core(&tle->cpu, core); 185 add_cpus_to_mask(&tle->cpu, book, core);
169 break; 186 break;
170 default: 187 default:
171 clear_cores(); 188 clear_masks();
172 machine_has_topology = 0; 189 machine_has_topology = 0;
173 goto out; 190 goto out;
174 } 191 }
@@ -221,10 +238,29 @@ int topology_set_cpu_management(int fc)
221 238
222static void update_cpu_core_map(void) 239static void update_cpu_core_map(void)
223{ 240{
241 unsigned long flags;
224 int cpu; 242 int cpu;
225 243
226 for_each_possible_cpu(cpu) 244 spin_lock_irqsave(&topology_lock, flags);
227 cpu_core_map[cpu] = cpu_coregroup_map(cpu); 245 for_each_possible_cpu(cpu) {
246 cpu_core_map[cpu] = cpu_group_map(&core_info, cpu);
247#ifdef CONFIG_SCHED_BOOK
248 cpu_book_map[cpu] = cpu_group_map(&book_info, cpu);
249#endif
250 }
251 spin_unlock_irqrestore(&topology_lock, flags);
252}
253
254static void store_topology(struct tl_info *info)
255{
256#ifdef CONFIG_SCHED_BOOK
257 int rc;
258
259 rc = stsi(info, 15, 1, 3);
260 if (rc != -ENOSYS)
261 return;
262#endif
263 stsi(info, 15, 1, 2);
228} 264}
229 265
230int arch_update_cpu_topology(void) 266int arch_update_cpu_topology(void)
@@ -238,7 +274,7 @@ int arch_update_cpu_topology(void)
238 topology_update_polarization_simple(); 274 topology_update_polarization_simple();
239 return 0; 275 return 0;
240 } 276 }
241 stsi(info, 15, 1, 2); 277 store_topology(info);
242 tl_to_cores(info); 278 tl_to_cores(info);
243 update_cpu_core_map(); 279 update_cpu_core_map();
244 for_each_online_cpu(cpu) { 280 for_each_online_cpu(cpu) {
@@ -299,12 +335,24 @@ out:
299} 335}
300__initcall(init_topology_update); 336__initcall(init_topology_update);
301 337
338static void alloc_masks(struct tl_info *info, struct mask_info *mask, int offset)
339{
340 int i, nr_masks;
341
342 nr_masks = info->mag[NR_MAG - offset];
343 for (i = 0; i < info->mnest - offset; i++)
344 nr_masks *= info->mag[NR_MAG - offset - 1 - i];
345 nr_masks = max(nr_masks, 1);
346 for (i = 0; i < nr_masks; i++) {
347 mask->next = alloc_bootmem(sizeof(struct mask_info));
348 mask = mask->next;
349 }
350}
351
302void __init s390_init_cpu_topology(void) 352void __init s390_init_cpu_topology(void)
303{ 353{
304 unsigned long long facility_bits; 354 unsigned long long facility_bits;
305 struct tl_info *info; 355 struct tl_info *info;
306 struct core_info *core;
307 int nr_cores;
308 int i; 356 int i;
309 357
310 if (stfle(&facility_bits, 1) <= 0) 358 if (stfle(&facility_bits, 1) <= 0)
@@ -315,25 +363,13 @@ void __init s390_init_cpu_topology(void)
315 363
316 tl_info = alloc_bootmem_pages(PAGE_SIZE); 364 tl_info = alloc_bootmem_pages(PAGE_SIZE);
317 info = tl_info; 365 info = tl_info;
318 stsi(info, 15, 1, 2); 366 store_topology(info);
319
320 nr_cores = info->mag[NR_MAG - 2];
321 for (i = 0; i < info->mnest - 2; i++)
322 nr_cores *= info->mag[NR_MAG - 3 - i];
323
324 pr_info("The CPU configuration topology of the machine is:"); 367 pr_info("The CPU configuration topology of the machine is:");
325 for (i = 0; i < NR_MAG; i++) 368 for (i = 0; i < NR_MAG; i++)
326 printk(" %d", info->mag[i]); 369 printk(" %d", info->mag[i]);
327 printk(" / %d\n", info->mnest); 370 printk(" / %d\n", info->mnest);
328 371 alloc_masks(info, &core_info, 2);
329 core = &core_info; 372#ifdef CONFIG_SCHED_BOOK
330 for (i = 0; i < nr_cores; i++) { 373 alloc_masks(info, &book_info, 3);
331 core->next = alloc_bootmem(sizeof(struct core_info)); 374#endif
332 core = core->next;
333 if (!core)
334 goto error;
335 }
336 return;
337error:
338 machine_has_topology = 0;
339} 375}
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 33990fa95af0..35b6879628a0 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -16,6 +16,7 @@ config SUPERH
16 select HAVE_ARCH_TRACEHOOK 16 select HAVE_ARCH_TRACEHOOK
17 select HAVE_DMA_API_DEBUG 17 select HAVE_DMA_API_DEBUG
18 select HAVE_DMA_ATTRS 18 select HAVE_DMA_ATTRS
19 select HAVE_IRQ_WORK
19 select HAVE_PERF_EVENTS 20 select HAVE_PERF_EVENTS
20 select PERF_USE_VMALLOC 21 select PERF_USE_VMALLOC
21 select HAVE_KERNEL_GZIP 22 select HAVE_KERNEL_GZIP
@@ -249,6 +250,11 @@ config ARCH_SHMOBILE
249 select PM 250 select PM
250 select PM_RUNTIME 251 select PM_RUNTIME
251 252
253config CPU_HAS_PMU
254 depends on CPU_SH4 || CPU_SH4A
255 default y
256 bool
257
252if SUPERH32 258if SUPERH32
253 259
254choice 260choice
@@ -738,6 +744,14 @@ config GUSA_RB
738 LLSC, this should be more efficient than the other alternative of 744 LLSC, this should be more efficient than the other alternative of
739 disabling interrupts around the atomic sequence. 745 disabling interrupts around the atomic sequence.
740 746
747config HW_PERF_EVENTS
748 bool "Enable hardware performance counter support for perf events"
749 depends on PERF_EVENTS && CPU_HAS_PMU
750 default y
751 help
752 Enable hardware performance counter support for perf events. If
753 disabled, perf events will use software events only.
754
741source "drivers/sh/Kconfig" 755source "drivers/sh/Kconfig"
742 756
743endmenu 757endmenu
diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h
index 3d0c9f36d150..14308bed7ea5 100644
--- a/arch/sh/include/asm/perf_event.h
+++ b/arch/sh/include/asm/perf_event.h
@@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu *);
26extern int reserve_pmc_hardware(void); 26extern int reserve_pmc_hardware(void);
27extern void release_pmc_hardware(void); 27extern void release_pmc_hardware(void);
28 28
29static inline void set_perf_event_pending(void)
30{
31 /* Nothing to see here, move along. */
32}
33
34#define PERF_EVENT_INDEX_OFFSET 0
35
36#endif /* __ASM_SH_PERF_EVENT_H */ 29#endif /* __ASM_SH_PERF_EVENT_H */
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 257de1f0692b..ae5bac39b896 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -290,7 +290,7 @@ void __init init_IRQ(void)
290int __init arch_probe_nr_irqs(void) 290int __init arch_probe_nr_irqs(void)
291{ 291{
292 nr_irqs = sh_mv.mv_nr_irqs; 292 nr_irqs = sh_mv.mv_nr_irqs;
293 return 0; 293 return NR_IRQS_LEGACY;
294} 294}
295#endif 295#endif
296 296
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c
index a9dd3abde28e..d5ca1ef50fa9 100644
--- a/arch/sh/kernel/perf_callchain.c
+++ b/arch/sh/kernel/perf_callchain.c
@@ -14,11 +14,6 @@
14#include <asm/unwinder.h> 14#include <asm/unwinder.h>
15#include <asm/ptrace.h> 15#include <asm/ptrace.h>
16 16
17static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
18{
19 if (entry->nr < PERF_MAX_STACK_DEPTH)
20 entry->ip[entry->nr++] = ip;
21}
22 17
23static void callchain_warning(void *data, char *msg) 18static void callchain_warning(void *data, char *msg)
24{ 19{
@@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable)
39 struct perf_callchain_entry *entry = data; 34 struct perf_callchain_entry *entry = data;
40 35
41 if (reliable) 36 if (reliable)
42 callchain_store(entry, addr); 37 perf_callchain_store(entry, addr);
43} 38}
44 39
45static const struct stacktrace_ops callchain_ops = { 40static const struct stacktrace_ops callchain_ops = {
@@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = {
49 .address = callchain_address, 44 .address = callchain_address,
50}; 45};
51 46
52static void 47void
53perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 48perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
54{ 49{
55 callchain_store(entry, PERF_CONTEXT_KERNEL); 50 perf_callchain_store(entry, regs->pc);
56 callchain_store(entry, regs->pc);
57 51
58 unwind_stack(NULL, regs, NULL, &callchain_ops, entry); 52 unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
59} 53}
60
61static void
62perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
63{
64 int is_user;
65
66 if (!regs)
67 return;
68
69 is_user = user_mode(regs);
70
71 if (is_user && current->state != TASK_RUNNING)
72 return;
73
74 /*
75 * Only the kernel side is implemented for now.
76 */
77 if (!is_user)
78 perf_callchain_kernel(regs, entry);
79}
80
81/*
82 * No need for separate IRQ and NMI entries.
83 */
84static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
85
86struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
87{
88 struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
89
90 entry->nr = 0;
91
92 perf_do_callchain(regs, entry);
93
94 return entry;
95}
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 7a3dc3567258..5a4b33435650 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -59,6 +59,24 @@ static inline int sh_pmu_initialized(void)
59 return !!sh_pmu; 59 return !!sh_pmu;
60} 60}
61 61
62const char *perf_pmu_name(void)
63{
64 if (!sh_pmu)
65 return NULL;
66
67 return sh_pmu->name;
68}
69EXPORT_SYMBOL_GPL(perf_pmu_name);
70
71int perf_num_counters(void)
72{
73 if (!sh_pmu)
74 return 0;
75
76 return sh_pmu->num_events;
77}
78EXPORT_SYMBOL_GPL(perf_num_counters);
79
62/* 80/*
63 * Release the PMU if this is the last perf_event. 81 * Release the PMU if this is the last perf_event.
64 */ 82 */
@@ -206,50 +224,80 @@ again:
206 local64_add(delta, &event->count); 224 local64_add(delta, &event->count);
207} 225}
208 226
209static void sh_pmu_disable(struct perf_event *event) 227static void sh_pmu_stop(struct perf_event *event, int flags)
210{ 228{
211 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 229 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
212 struct hw_perf_event *hwc = &event->hw; 230 struct hw_perf_event *hwc = &event->hw;
213 int idx = hwc->idx; 231 int idx = hwc->idx;
214 232
215 clear_bit(idx, cpuc->active_mask); 233 if (!(event->hw.state & PERF_HES_STOPPED)) {
216 sh_pmu->disable(hwc, idx); 234 sh_pmu->disable(hwc, idx);
235 cpuc->events[idx] = NULL;
236 event->hw.state |= PERF_HES_STOPPED;
237 }
238
239 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
240 sh_perf_event_update(event, &event->hw, idx);
241 event->hw.state |= PERF_HES_UPTODATE;
242 }
243}
244
245static void sh_pmu_start(struct perf_event *event, int flags)
246{
247 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
248 struct hw_perf_event *hwc = &event->hw;
249 int idx = hwc->idx;
250
251 if (WARN_ON_ONCE(idx == -1))
252 return;
253
254 if (flags & PERF_EF_RELOAD)
255 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
217 256
218 barrier(); 257 cpuc->events[idx] = event;
258 event->hw.state = 0;
259 sh_pmu->enable(hwc, idx);
260}
219 261
220 sh_perf_event_update(event, &event->hw, idx); 262static void sh_pmu_del(struct perf_event *event, int flags)
263{
264 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
221 265
222 cpuc->events[idx] = NULL; 266 sh_pmu_stop(event, PERF_EF_UPDATE);
223 clear_bit(idx, cpuc->used_mask); 267 __clear_bit(event->hw.idx, cpuc->used_mask);
224 268
225 perf_event_update_userpage(event); 269 perf_event_update_userpage(event);
226} 270}
227 271
228static int sh_pmu_enable(struct perf_event *event) 272static int sh_pmu_add(struct perf_event *event, int flags)
229{ 273{
230 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 274 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
231 struct hw_perf_event *hwc = &event->hw; 275 struct hw_perf_event *hwc = &event->hw;
232 int idx = hwc->idx; 276 int idx = hwc->idx;
277 int ret = -EAGAIN;
278
279 perf_pmu_disable(event->pmu);
233 280
234 if (test_and_set_bit(idx, cpuc->used_mask)) { 281 if (__test_and_set_bit(idx, cpuc->used_mask)) {
235 idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); 282 idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
236 if (idx == sh_pmu->num_events) 283 if (idx == sh_pmu->num_events)
237 return -EAGAIN; 284 goto out;
238 285
239 set_bit(idx, cpuc->used_mask); 286 __set_bit(idx, cpuc->used_mask);
240 hwc->idx = idx; 287 hwc->idx = idx;
241 } 288 }
242 289
243 sh_pmu->disable(hwc, idx); 290 sh_pmu->disable(hwc, idx);
244 291
245 cpuc->events[idx] = event; 292 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
246 set_bit(idx, cpuc->active_mask); 293 if (flags & PERF_EF_START)
247 294 sh_pmu_start(event, PERF_EF_RELOAD);
248 sh_pmu->enable(hwc, idx);
249 295
250 perf_event_update_userpage(event); 296 perf_event_update_userpage(event);
251 297 ret = 0;
252 return 0; 298out:
299 perf_pmu_enable(event->pmu);
300 return ret;
253} 301}
254 302
255static void sh_pmu_read(struct perf_event *event) 303static void sh_pmu_read(struct perf_event *event)
@@ -257,24 +305,56 @@ static void sh_pmu_read(struct perf_event *event)
257 sh_perf_event_update(event, &event->hw, event->hw.idx); 305 sh_perf_event_update(event, &event->hw, event->hw.idx);
258} 306}
259 307
260static const struct pmu pmu = { 308static int sh_pmu_event_init(struct perf_event *event)
261 .enable = sh_pmu_enable,
262 .disable = sh_pmu_disable,
263 .read = sh_pmu_read,
264};
265
266const struct pmu *hw_perf_event_init(struct perf_event *event)
267{ 309{
268 int err = __hw_perf_event_init(event); 310 int err;
311
312 switch (event->attr.type) {
313 case PERF_TYPE_RAW:
314 case PERF_TYPE_HW_CACHE:
315 case PERF_TYPE_HARDWARE:
316 err = __hw_perf_event_init(event);
317 break;
318
319 default:
320 return -ENOENT;
321 }
322
269 if (unlikely(err)) { 323 if (unlikely(err)) {
270 if (event->destroy) 324 if (event->destroy)
271 event->destroy(event); 325 event->destroy(event);
272 return ERR_PTR(err);
273 } 326 }
274 327
275 return &pmu; 328 return err;
329}
330
331static void sh_pmu_enable(struct pmu *pmu)
332{
333 if (!sh_pmu_initialized())
334 return;
335
336 sh_pmu->enable_all();
337}
338
339static void sh_pmu_disable(struct pmu *pmu)
340{
341 if (!sh_pmu_initialized())
342 return;
343
344 sh_pmu->disable_all();
276} 345}
277 346
347static struct pmu pmu = {
348 .pmu_enable = sh_pmu_enable,
349 .pmu_disable = sh_pmu_disable,
350 .event_init = sh_pmu_event_init,
351 .add = sh_pmu_add,
352 .del = sh_pmu_del,
353 .start = sh_pmu_start,
354 .stop = sh_pmu_stop,
355 .read = sh_pmu_read,
356};
357
278static void sh_pmu_setup(int cpu) 358static void sh_pmu_setup(int cpu)
279{ 359{
280 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); 360 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
@@ -299,32 +379,17 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
299 return NOTIFY_OK; 379 return NOTIFY_OK;
300} 380}
301 381
302void hw_perf_enable(void) 382int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
303{
304 if (!sh_pmu_initialized())
305 return;
306
307 sh_pmu->enable_all();
308}
309
310void hw_perf_disable(void)
311{
312 if (!sh_pmu_initialized())
313 return;
314
315 sh_pmu->disable_all();
316}
317
318int __cpuinit register_sh_pmu(struct sh_pmu *pmu)
319{ 383{
320 if (sh_pmu) 384 if (sh_pmu)
321 return -EBUSY; 385 return -EBUSY;
322 sh_pmu = pmu; 386 sh_pmu = _pmu;
323 387
324 pr_info("Performance Events: %s support registered\n", pmu->name); 388 pr_info("Performance Events: %s support registered\n", _pmu->name);
325 389
326 WARN_ON(pmu->num_events > MAX_HWEVENTS); 390 WARN_ON(_pmu->num_events > MAX_HWEVENTS);
327 391
392 perf_pmu_register(&pmu);
328 perf_cpu_notifier(sh_pmu_notifier); 393 perf_cpu_notifier(sh_pmu_notifier);
329 return 0; 394 return 0;
330} 395}
diff --git a/arch/sh/oprofile/Makefile b/arch/sh/oprofile/Makefile
index 4886c5c1786c..e85aae73e3dc 100644
--- a/arch/sh/oprofile/Makefile
+++ b/arch/sh/oprofile/Makefile
@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
6 oprofilefs.o oprofile_stats.o \ 6 oprofilefs.o oprofile_stats.o \
7 timer_int.o ) 7 timer_int.o )
8 8
9ifeq ($(CONFIG_HW_PERF_EVENTS),y)
10DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
11endif
12
9oprofile-y := $(DRIVER_OBJS) common.o backtrace.o 13oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
diff --git a/arch/sh/oprofile/common.c b/arch/sh/oprofile/common.c
index ac604937f3ee..e10d89376f9b 100644
--- a/arch/sh/oprofile/common.c
+++ b/arch/sh/oprofile/common.c
@@ -17,114 +17,45 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/perf_event.h>
20#include <asm/processor.h> 21#include <asm/processor.h>
21#include "op_impl.h"
22
23static struct op_sh_model *model;
24
25static struct op_counter_config ctr[20];
26 22
23#ifdef CONFIG_HW_PERF_EVENTS
27extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth); 24extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth);
28 25
29static int op_sh_setup(void) 26char *op_name_from_perf_id(void)
30{
31 /* Pre-compute the values to stuff in the hardware registers. */
32 model->reg_setup(ctr);
33
34 /* Configure the registers on all cpus. */
35 on_each_cpu(model->cpu_setup, NULL, 1);
36
37 return 0;
38}
39
40static int op_sh_create_files(struct super_block *sb, struct dentry *root)
41{ 27{
42 int i, ret = 0; 28 const char *pmu;
29 char buf[20];
30 int size;
43 31
44 for (i = 0; i < model->num_counters; i++) { 32 pmu = perf_pmu_name();
45 struct dentry *dir; 33 if (!pmu)
46 char buf[4]; 34 return NULL;
47 35
48 snprintf(buf, sizeof(buf), "%d", i); 36 size = snprintf(buf, sizeof(buf), "sh/%s", pmu);
49 dir = oprofilefs_mkdir(sb, root, buf); 37 if (size > -1 && size < sizeof(buf))
38 return buf;
50 39
51 ret |= oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled); 40 return NULL;
52 ret |= oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event);
53 ret |= oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel);
54 ret |= oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user);
55
56 if (model->create_files)
57 ret |= model->create_files(sb, dir);
58 else
59 ret |= oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count);
60
61 /* Dummy entries */
62 ret |= oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask);
63 }
64
65 return ret;
66} 41}
67 42
68static int op_sh_start(void) 43int __init oprofile_arch_init(struct oprofile_operations *ops)
69{ 44{
70 /* Enable performance monitoring for all counters. */ 45 ops->backtrace = sh_backtrace;
71 on_each_cpu(model->cpu_start, NULL, 1);
72 46
73 return 0; 47 return oprofile_perf_init(ops);
74} 48}
75 49
76static void op_sh_stop(void) 50void __exit oprofile_arch_exit(void)
77{ 51{
78 /* Disable performance monitoring for all counters. */ 52 oprofile_perf_exit();
79 on_each_cpu(model->cpu_stop, NULL, 1);
80} 53}
81 54#else
82int __init oprofile_arch_init(struct oprofile_operations *ops) 55int __init oprofile_arch_init(struct oprofile_operations *ops)
83{ 56{
84 struct op_sh_model *lmodel = NULL; 57 pr_info("oprofile: hardware counters not available\n");
85 int ret; 58 return -ENODEV;
86
87 /*
88 * Always assign the backtrace op. If the counter initialization
89 * fails, we fall back to the timer which will still make use of
90 * this.
91 */
92 ops->backtrace = sh_backtrace;
93
94 /*
95 * XXX
96 *
97 * All of the SH7750/SH-4A counters have been converted to perf,
98 * this infrastructure hook is left for other users until they've
99 * had a chance to convert over, at which point all of this
100 * will be deleted.
101 */
102
103 if (!lmodel)
104 return -ENODEV;
105 if (!(current_cpu_data.flags & CPU_HAS_PERF_COUNTER))
106 return -ENODEV;
107
108 ret = lmodel->init();
109 if (unlikely(ret != 0))
110 return ret;
111
112 model = lmodel;
113
114 ops->setup = op_sh_setup;
115 ops->create_files = op_sh_create_files;
116 ops->start = op_sh_start;
117 ops->stop = op_sh_stop;
118 ops->cpu_type = lmodel->cpu_type;
119
120 printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
121 lmodel->cpu_type);
122
123 return 0;
124}
125
126void oprofile_arch_exit(void)
127{
128 if (model && model->exit)
129 model->exit();
130} 59}
60void __exit oprofile_arch_exit(void) {}
61#endif /* CONFIG_HW_PERF_EVENTS */
diff --git a/arch/sh/oprofile/op_impl.h b/arch/sh/oprofile/op_impl.h
deleted file mode 100644
index 1244479ceb29..000000000000
--- a/arch/sh/oprofile/op_impl.h
+++ /dev/null
@@ -1,33 +0,0 @@
1#ifndef __OP_IMPL_H
2#define __OP_IMPL_H
3
4/* Per-counter configuration as set via oprofilefs. */
5struct op_counter_config {
6 unsigned long enabled;
7 unsigned long event;
8
9 unsigned long count;
10
11 /* Dummy values for userspace tool compliance */
12 unsigned long kernel;
13 unsigned long user;
14 unsigned long unit_mask;
15};
16
17/* Per-architecture configury and hooks. */
18struct op_sh_model {
19 void (*reg_setup)(struct op_counter_config *);
20 int (*create_files)(struct super_block *sb, struct dentry *dir);
21 void (*cpu_setup)(void *dummy);
22 int (*init)(void);
23 void (*exit)(void);
24 void (*cpu_start)(void *args);
25 void (*cpu_stop)(void *args);
26 char *cpu_type;
27 unsigned char num_counters;
28};
29
30/* arch/sh/oprofile/common.c */
31extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth);
32
33#endif /* __OP_IMPL_H */
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 491e9d6de191..3e9d31401fb2 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -26,10 +26,12 @@ config SPARC
26 select ARCH_WANT_OPTIONAL_GPIOLIB 26 select ARCH_WANT_OPTIONAL_GPIOLIB
27 select RTC_CLASS 27 select RTC_CLASS
28 select RTC_DRV_M48T59 28 select RTC_DRV_M48T59
29 select HAVE_IRQ_WORK
29 select HAVE_PERF_EVENTS 30 select HAVE_PERF_EVENTS
30 select PERF_USE_VMALLOC 31 select PERF_USE_VMALLOC
31 select HAVE_DMA_ATTRS 32 select HAVE_DMA_ATTRS
32 select HAVE_DMA_API_DEBUG 33 select HAVE_DMA_API_DEBUG
34 select HAVE_ARCH_JUMP_LABEL
33 35
34config SPARC32 36config SPARC32
35 def_bool !64BIT 37 def_bool !64BIT
@@ -53,6 +55,7 @@ config SPARC64
53 select RTC_DRV_BQ4802 55 select RTC_DRV_BQ4802
54 select RTC_DRV_SUN4V 56 select RTC_DRV_SUN4V
55 select RTC_DRV_STARFIRE 57 select RTC_DRV_STARFIRE
58 select HAVE_IRQ_WORK
56 select HAVE_PERF_EVENTS 59 select HAVE_PERF_EVENTS
57 select PERF_USE_VMALLOC 60 select PERF_USE_VMALLOC
58 61
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
new file mode 100644
index 000000000000..62e66d7b2fb6
--- /dev/null
+++ b/arch/sparc/include/asm/jump_label.h
@@ -0,0 +1,32 @@
1#ifndef _ASM_SPARC_JUMP_LABEL_H
2#define _ASM_SPARC_JUMP_LABEL_H
3
4#ifdef __KERNEL__
5
6#include <linux/types.h>
7#include <asm/system.h>
8
9#define JUMP_LABEL_NOP_SIZE 4
10
11#define JUMP_LABEL(key, label) \
12 do { \
13 asm goto("1:\n\t" \
14 "nop\n\t" \
15 "nop\n\t" \
16 ".pushsection __jump_table, \"a\"\n\t"\
17 ".word 1b, %l[" #label "], %c0\n\t" \
18 ".popsection \n\t" \
19 : : "i" (key) : : label);\
20 } while (0)
21
22#endif /* __KERNEL__ */
23
24typedef u32 jump_label_t;
25
26struct jump_entry {
27 jump_label_t code;
28 jump_label_t target;
29 jump_label_t key;
30};
31
32#endif
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 727af70646cb..6e8bfa1786da 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -1,10 +1,6 @@
1#ifndef __ASM_SPARC_PERF_EVENT_H 1#ifndef __ASM_SPARC_PERF_EVENT_H
2#define __ASM_SPARC_PERF_EVENT_H 2#define __ASM_SPARC_PERF_EVENT_H
3 3
4extern void set_perf_event_pending(void);
5
6#define PERF_EVENT_INDEX_OFFSET 0
7
8#ifdef CONFIG_PERF_EVENTS 4#ifdef CONFIG_PERF_EVENTS
9#include <asm/ptrace.h> 5#include <asm/ptrace.h>
10 6
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 0c2dc1f24a9a..599398fbbc7c 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -119,3 +119,5 @@ obj-$(CONFIG_COMPAT) += $(audit--y)
119 119
120pc--$(CONFIG_PERF_EVENTS) := perf_event.o 120pc--$(CONFIG_PERF_EVENTS) := perf_event.o
121obj-$(CONFIG_SPARC64) += $(pc--y) 121obj-$(CONFIG_SPARC64) += $(pc--y)
122
123obj-$(CONFIG_SPARC64) += jump_label.o
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
new file mode 100644
index 000000000000..ea2dafc93d78
--- /dev/null
+++ b/arch/sparc/kernel/jump_label.c
@@ -0,0 +1,47 @@
1#include <linux/kernel.h>
2#include <linux/types.h>
3#include <linux/mutex.h>
4#include <linux/cpu.h>
5
6#include <linux/jump_label.h>
7#include <linux/memory.h>
8
9#ifdef HAVE_JUMP_LABEL
10
11void arch_jump_label_transform(struct jump_entry *entry,
12 enum jump_label_type type)
13{
14 u32 val;
15 u32 *insn = (u32 *) (unsigned long) entry->code;
16
17 if (type == JUMP_LABEL_ENABLE) {
18 s32 off = (s32)entry->target - (s32)entry->code;
19
20#ifdef CONFIG_SPARC64
21 /* ba,pt %xcc, . + (off << 2) */
22 val = 0x10680000 | ((u32) off >> 2);
23#else
24 /* ba . + (off << 2) */
25 val = 0x10800000 | ((u32) off >> 2);
26#endif
27 } else {
28 val = 0x01000000;
29 }
30
31 get_online_cpus();
32 mutex_lock(&text_mutex);
33 *insn = val;
34 flushi(insn);
35 mutex_unlock(&text_mutex);
36 put_online_cpus();
37}
38
39void arch_jump_label_text_poke_early(jump_label_t addr)
40{
41 u32 *insn_p = (u32 *) (unsigned long) addr;
42
43 *insn_p = 0x01000000;
44 flushi(insn_p);
45}
46
47#endif
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index f848aadf54dc..ee3c7dde8d9f 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -18,6 +18,9 @@
18#include <asm/spitfire.h> 18#include <asm/spitfire.h>
19 19
20#ifdef CONFIG_SPARC64 20#ifdef CONFIG_SPARC64
21
22#include <linux/jump_label.h>
23
21static void *module_map(unsigned long size) 24static void *module_map(unsigned long size)
22{ 25{
23 struct vm_struct *area; 26 struct vm_struct *area;
@@ -227,6 +230,9 @@ int module_finalize(const Elf_Ehdr *hdr,
227 const Elf_Shdr *sechdrs, 230 const Elf_Shdr *sechdrs,
228 struct module *me) 231 struct module *me)
229{ 232{
233 /* make jump label nops */
234 jump_label_apply_nops(me);
235
230 /* Cheetah's I-cache is fully coherent. */ 236 /* Cheetah's I-cache is fully coherent. */
231 if (tlb_type == spitfire) { 237 if (tlb_type == spitfire) {
232 unsigned long va; 238 unsigned long va;
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index 548b8ca9c210..b210416ace7b 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -114,10 +114,10 @@ static void free_msi(struct pci_pbm_info *pbm, int msi_num)
114 114
115static struct irq_chip msi_irq = { 115static struct irq_chip msi_irq = {
116 .name = "PCI-MSI", 116 .name = "PCI-MSI",
117 .mask = mask_msi_irq, 117 .irq_mask = mask_msi_irq,
118 .unmask = unmask_msi_irq, 118 .irq_unmask = unmask_msi_irq,
119 .enable = unmask_msi_irq, 119 .irq_enable = unmask_msi_irq,
120 .disable = mask_msi_irq, 120 .irq_disable = mask_msi_irq,
121 /* XXX affinity XXX */ 121 /* XXX affinity XXX */
122}; 122};
123 123
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index c4a6a50b4849..b87873c0e8ea 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -7,7 +7,7 @@
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/irq.h> 8#include <linux/irq.h>
9 9
10#include <linux/perf_event.h> 10#include <linux/irq_work.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12 12
13#include <asm/pil.h> 13#include <asm/pil.h>
@@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs)
43 43
44 old_regs = set_irq_regs(regs); 44 old_regs = set_irq_regs(regs);
45 irq_enter(); 45 irq_enter();
46#ifdef CONFIG_PERF_EVENTS 46#ifdef CONFIG_IRQ_WORK
47 perf_event_do_pending(); 47 irq_work_run();
48#endif 48#endif
49 irq_exit(); 49 irq_exit();
50 set_irq_regs(old_regs); 50 set_irq_regs(old_regs);
51} 51}
52 52
53void set_perf_event_pending(void) 53void arch_irq_work_raise(void)
54{ 54{
55 set_softint(1 << PIL_DEFERRED_PCR_WORK); 55 set_softint(1 << PIL_DEFERRED_PCR_WORK);
56} 56}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 6318e622cfb0..0d6deb55a2ae 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
658 658
659 enc = perf_event_get_enc(cpuc->events[i]); 659 enc = perf_event_get_enc(cpuc->events[i]);
660 pcr &= ~mask_for_index(idx); 660 pcr &= ~mask_for_index(idx);
661 pcr |= event_encoding(enc, idx); 661 if (hwc->state & PERF_HES_STOPPED)
662 pcr |= nop_for_index(idx);
663 else
664 pcr |= event_encoding(enc, idx);
662 } 665 }
663out: 666out:
664 return pcr; 667 return pcr;
665} 668}
666 669
667void hw_perf_enable(void) 670static void sparc_pmu_enable(struct pmu *pmu)
668{ 671{
669 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 672 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
670 u64 pcr; 673 u64 pcr;
@@ -691,7 +694,7 @@ void hw_perf_enable(void)
691 pcr_ops->write(cpuc->pcr); 694 pcr_ops->write(cpuc->pcr);
692} 695}
693 696
694void hw_perf_disable(void) 697static void sparc_pmu_disable(struct pmu *pmu)
695{ 698{
696 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 699 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
697 u64 val; 700 u64 val;
@@ -710,19 +713,65 @@ void hw_perf_disable(void)
710 pcr_ops->write(cpuc->pcr); 713 pcr_ops->write(cpuc->pcr);
711} 714}
712 715
713static void sparc_pmu_disable(struct perf_event *event) 716static int active_event_index(struct cpu_hw_events *cpuc,
717 struct perf_event *event)
718{
719 int i;
720
721 for (i = 0; i < cpuc->n_events; i++) {
722 if (cpuc->event[i] == event)
723 break;
724 }
725 BUG_ON(i == cpuc->n_events);
726 return cpuc->current_idx[i];
727}
728
729static void sparc_pmu_start(struct perf_event *event, int flags)
730{
731 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
732 int idx = active_event_index(cpuc, event);
733
734 if (flags & PERF_EF_RELOAD) {
735 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
736 sparc_perf_event_set_period(event, &event->hw, idx);
737 }
738
739 event->hw.state = 0;
740
741 sparc_pmu_enable_event(cpuc, &event->hw, idx);
742}
743
744static void sparc_pmu_stop(struct perf_event *event, int flags)
745{
746 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
747 int idx = active_event_index(cpuc, event);
748
749 if (!(event->hw.state & PERF_HES_STOPPED)) {
750 sparc_pmu_disable_event(cpuc, &event->hw, idx);
751 event->hw.state |= PERF_HES_STOPPED;
752 }
753
754 if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) {
755 sparc_perf_event_update(event, &event->hw, idx);
756 event->hw.state |= PERF_HES_UPTODATE;
757 }
758}
759
760static void sparc_pmu_del(struct perf_event *event, int _flags)
714{ 761{
715 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 762 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
716 struct hw_perf_event *hwc = &event->hw;
717 unsigned long flags; 763 unsigned long flags;
718 int i; 764 int i;
719 765
720 local_irq_save(flags); 766 local_irq_save(flags);
721 perf_disable(); 767 perf_pmu_disable(event->pmu);
722 768
723 for (i = 0; i < cpuc->n_events; i++) { 769 for (i = 0; i < cpuc->n_events; i++) {
724 if (event == cpuc->event[i]) { 770 if (event == cpuc->event[i]) {
725 int idx = cpuc->current_idx[i]; 771 /* Absorb the final count and turn off the
772 * event.
773 */
774 sparc_pmu_stop(event, PERF_EF_UPDATE);
726 775
727 /* Shift remaining entries down into 776 /* Shift remaining entries down into
728 * the existing slot. 777 * the existing slot.
@@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event)
734 cpuc->current_idx[i]; 783 cpuc->current_idx[i];
735 } 784 }
736 785
737 /* Absorb the final count and turn off the
738 * event.
739 */
740 sparc_pmu_disable_event(cpuc, hwc, idx);
741 barrier();
742 sparc_perf_event_update(event, hwc, idx);
743
744 perf_event_update_userpage(event); 786 perf_event_update_userpage(event);
745 787
746 cpuc->n_events--; 788 cpuc->n_events--;
@@ -748,23 +790,10 @@ static void sparc_pmu_disable(struct perf_event *event)
748 } 790 }
749 } 791 }
750 792
751 perf_enable(); 793 perf_pmu_enable(event->pmu);
752 local_irq_restore(flags); 794 local_irq_restore(flags);
753} 795}
754 796
755static int active_event_index(struct cpu_hw_events *cpuc,
756 struct perf_event *event)
757{
758 int i;
759
760 for (i = 0; i < cpuc->n_events; i++) {
761 if (cpuc->event[i] == event)
762 break;
763 }
764 BUG_ON(i == cpuc->n_events);
765 return cpuc->current_idx[i];
766}
767
768static void sparc_pmu_read(struct perf_event *event) 797static void sparc_pmu_read(struct perf_event *event)
769{ 798{
770 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 799 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event)
774 sparc_perf_event_update(event, hwc, idx); 803 sparc_perf_event_update(event, hwc, idx);
775} 804}
776 805
777static void sparc_pmu_unthrottle(struct perf_event *event)
778{
779 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
780 int idx = active_event_index(cpuc, event);
781 struct hw_perf_event *hwc = &event->hw;
782
783 sparc_pmu_enable_event(cpuc, hwc, idx);
784}
785
786static atomic_t active_events = ATOMIC_INIT(0); 806static atomic_t active_events = ATOMIC_INIT(0);
787static DEFINE_MUTEX(pmc_grab_mutex); 807static DEFINE_MUTEX(pmc_grab_mutex);
788 808
@@ -877,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts,
877 if (!n_ev) 897 if (!n_ev)
878 return 0; 898 return 0;
879 899
880 if (n_ev > perf_max_events) 900 if (n_ev > MAX_HWEVENTS)
881 return -1; 901 return -1;
882 902
883 msk0 = perf_event_get_msk(events[0]); 903 msk0 = perf_event_get_msk(events[0]);
@@ -984,23 +1004,27 @@ static int collect_events(struct perf_event *group, int max_count,
984 return n; 1004 return n;
985} 1005}
986 1006
987static int sparc_pmu_enable(struct perf_event *event) 1007static int sparc_pmu_add(struct perf_event *event, int ef_flags)
988{ 1008{
989 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1009 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
990 int n0, ret = -EAGAIN; 1010 int n0, ret = -EAGAIN;
991 unsigned long flags; 1011 unsigned long flags;
992 1012
993 local_irq_save(flags); 1013 local_irq_save(flags);
994 perf_disable(); 1014 perf_pmu_disable(event->pmu);
995 1015
996 n0 = cpuc->n_events; 1016 n0 = cpuc->n_events;
997 if (n0 >= perf_max_events) 1017 if (n0 >= MAX_HWEVENTS)
998 goto out; 1018 goto out;
999 1019
1000 cpuc->event[n0] = event; 1020 cpuc->event[n0] = event;
1001 cpuc->events[n0] = event->hw.event_base; 1021 cpuc->events[n0] = event->hw.event_base;
1002 cpuc->current_idx[n0] = PIC_NO_INDEX; 1022 cpuc->current_idx[n0] = PIC_NO_INDEX;
1003 1023
1024 event->hw.state = PERF_HES_UPTODATE;
1025 if (!(ef_flags & PERF_EF_START))
1026 event->hw.state |= PERF_HES_STOPPED;
1027
1004 /* 1028 /*
1005 * If group events scheduling transaction was started, 1029 * If group events scheduling transaction was started,
1006 * skip the schedulability test here, it will be peformed 1030 * skip the schedulability test here, it will be peformed
@@ -1020,12 +1044,12 @@ nocheck:
1020 1044
1021 ret = 0; 1045 ret = 0;
1022out: 1046out:
1023 perf_enable(); 1047 perf_pmu_enable(event->pmu);
1024 local_irq_restore(flags); 1048 local_irq_restore(flags);
1025 return ret; 1049 return ret;
1026} 1050}
1027 1051
1028static int __hw_perf_event_init(struct perf_event *event) 1052static int sparc_pmu_event_init(struct perf_event *event)
1029{ 1053{
1030 struct perf_event_attr *attr = &event->attr; 1054 struct perf_event_attr *attr = &event->attr;
1031 struct perf_event *evts[MAX_HWEVENTS]; 1055 struct perf_event *evts[MAX_HWEVENTS];
@@ -1038,22 +1062,33 @@ static int __hw_perf_event_init(struct perf_event *event)
1038 if (atomic_read(&nmi_active) < 0) 1062 if (atomic_read(&nmi_active) < 0)
1039 return -ENODEV; 1063 return -ENODEV;
1040 1064
1041 pmap = NULL; 1065 switch (attr->type) {
1042 if (attr->type == PERF_TYPE_HARDWARE) { 1066 case PERF_TYPE_HARDWARE:
1043 if (attr->config >= sparc_pmu->max_events) 1067 if (attr->config >= sparc_pmu->max_events)
1044 return -EINVAL; 1068 return -EINVAL;
1045 pmap = sparc_pmu->event_map(attr->config); 1069 pmap = sparc_pmu->event_map(attr->config);
1046 } else if (attr->type == PERF_TYPE_HW_CACHE) { 1070 break;
1071
1072 case PERF_TYPE_HW_CACHE:
1047 pmap = sparc_map_cache_event(attr->config); 1073 pmap = sparc_map_cache_event(attr->config);
1048 if (IS_ERR(pmap)) 1074 if (IS_ERR(pmap))
1049 return PTR_ERR(pmap); 1075 return PTR_ERR(pmap);
1050 } else if (attr->type != PERF_TYPE_RAW) 1076 break;
1051 return -EOPNOTSUPP; 1077
1078 case PERF_TYPE_RAW:
1079 pmap = NULL;
1080 break;
1081
1082 default:
1083 return -ENOENT;
1084
1085 }
1052 1086
1053 if (pmap) { 1087 if (pmap) {
1054 hwc->event_base = perf_event_encode(pmap); 1088 hwc->event_base = perf_event_encode(pmap);
1055 } else { 1089 } else {
1056 /* User gives us "(encoding << 16) | pic_mask" for 1090 /*
1091 * User gives us "(encoding << 16) | pic_mask" for
1057 * PERF_TYPE_RAW events. 1092 * PERF_TYPE_RAW events.
1058 */ 1093 */
1059 hwc->event_base = attr->config; 1094 hwc->event_base = attr->config;
@@ -1071,7 +1106,7 @@ static int __hw_perf_event_init(struct perf_event *event)
1071 n = 0; 1106 n = 0;
1072 if (event->group_leader != event) { 1107 if (event->group_leader != event) {
1073 n = collect_events(event->group_leader, 1108 n = collect_events(event->group_leader,
1074 perf_max_events - 1, 1109 MAX_HWEVENTS - 1,
1075 evts, events, current_idx_dmy); 1110 evts, events, current_idx_dmy);
1076 if (n < 0) 1111 if (n < 0)
1077 return -EINVAL; 1112 return -EINVAL;
@@ -1107,10 +1142,11 @@ static int __hw_perf_event_init(struct perf_event *event)
1107 * Set the flag to make pmu::enable() not perform the 1142 * Set the flag to make pmu::enable() not perform the
1108 * schedulability test, it will be performed at commit time 1143 * schedulability test, it will be performed at commit time
1109 */ 1144 */
1110static void sparc_pmu_start_txn(const struct pmu *pmu) 1145static void sparc_pmu_start_txn(struct pmu *pmu)
1111{ 1146{
1112 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1147 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1113 1148
1149 perf_pmu_disable(pmu);
1114 cpuhw->group_flag |= PERF_EVENT_TXN; 1150 cpuhw->group_flag |= PERF_EVENT_TXN;
1115} 1151}
1116 1152
@@ -1119,11 +1155,12 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
1119 * Clear the flag and pmu::enable() will perform the 1155 * Clear the flag and pmu::enable() will perform the
1120 * schedulability test. 1156 * schedulability test.
1121 */ 1157 */
1122static void sparc_pmu_cancel_txn(const struct pmu *pmu) 1158static void sparc_pmu_cancel_txn(struct pmu *pmu)
1123{ 1159{
1124 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1160 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1125 1161
1126 cpuhw->group_flag &= ~PERF_EVENT_TXN; 1162 cpuhw->group_flag &= ~PERF_EVENT_TXN;
1163 perf_pmu_enable(pmu);
1127} 1164}
1128 1165
1129/* 1166/*
@@ -1131,7 +1168,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
1131 * Perform the group schedulability test as a whole 1168 * Perform the group schedulability test as a whole
1132 * Return 0 if success 1169 * Return 0 if success
1133 */ 1170 */
1134static int sparc_pmu_commit_txn(const struct pmu *pmu) 1171static int sparc_pmu_commit_txn(struct pmu *pmu)
1135{ 1172{
1136 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1173 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1137 int n; 1174 int n;
@@ -1147,28 +1184,24 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
1147 return -EAGAIN; 1184 return -EAGAIN;
1148 1185
1149 cpuc->group_flag &= ~PERF_EVENT_TXN; 1186 cpuc->group_flag &= ~PERF_EVENT_TXN;
1187 perf_pmu_enable(pmu);
1150 return 0; 1188 return 0;
1151} 1189}
1152 1190
1153static const struct pmu pmu = { 1191static struct pmu pmu = {
1154 .enable = sparc_pmu_enable, 1192 .pmu_enable = sparc_pmu_enable,
1155 .disable = sparc_pmu_disable, 1193 .pmu_disable = sparc_pmu_disable,
1194 .event_init = sparc_pmu_event_init,
1195 .add = sparc_pmu_add,
1196 .del = sparc_pmu_del,
1197 .start = sparc_pmu_start,
1198 .stop = sparc_pmu_stop,
1156 .read = sparc_pmu_read, 1199 .read = sparc_pmu_read,
1157 .unthrottle = sparc_pmu_unthrottle,
1158 .start_txn = sparc_pmu_start_txn, 1200 .start_txn = sparc_pmu_start_txn,
1159 .cancel_txn = sparc_pmu_cancel_txn, 1201 .cancel_txn = sparc_pmu_cancel_txn,
1160 .commit_txn = sparc_pmu_commit_txn, 1202 .commit_txn = sparc_pmu_commit_txn,
1161}; 1203};
1162 1204
1163const struct pmu *hw_perf_event_init(struct perf_event *event)
1164{
1165 int err = __hw_perf_event_init(event);
1166
1167 if (err)
1168 return ERR_PTR(err);
1169 return &pmu;
1170}
1171
1172void perf_event_print_debug(void) 1205void perf_event_print_debug(void)
1173{ 1206{
1174 unsigned long flags; 1207 unsigned long flags;
@@ -1244,7 +1277,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
1244 continue; 1277 continue;
1245 1278
1246 if (perf_event_overflow(event, 1, &data, regs)) 1279 if (perf_event_overflow(event, 1, &data, regs))
1247 sparc_pmu_disable_event(cpuc, hwc, idx); 1280 sparc_pmu_stop(event, 0);
1248 } 1281 }
1249 1282
1250 return NOTIFY_STOP; 1283 return NOTIFY_STOP;
@@ -1285,28 +1318,21 @@ void __init init_hw_perf_events(void)
1285 1318
1286 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); 1319 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
1287 1320
1288 /* All sparc64 PMUs currently have 2 events. */ 1321 perf_pmu_register(&pmu);
1289 perf_max_events = 2;
1290
1291 register_die_notifier(&perf_event_nmi_notifier); 1322 register_die_notifier(&perf_event_nmi_notifier);
1292} 1323}
1293 1324
1294static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) 1325void perf_callchain_kernel(struct perf_callchain_entry *entry,
1295{ 1326 struct pt_regs *regs)
1296 if (entry->nr < PERF_MAX_STACK_DEPTH)
1297 entry->ip[entry->nr++] = ip;
1298}
1299
1300static void perf_callchain_kernel(struct pt_regs *regs,
1301 struct perf_callchain_entry *entry)
1302{ 1327{
1303 unsigned long ksp, fp; 1328 unsigned long ksp, fp;
1304#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1329#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1305 int graph = 0; 1330 int graph = 0;
1306#endif 1331#endif
1307 1332
1308 callchain_store(entry, PERF_CONTEXT_KERNEL); 1333 stack_trace_flush();
1309 callchain_store(entry, regs->tpc); 1334
1335 perf_callchain_store(entry, regs->tpc);
1310 1336
1311 ksp = regs->u_regs[UREG_I6]; 1337 ksp = regs->u_regs[UREG_I6];
1312 fp = ksp + STACK_BIAS; 1338 fp = ksp + STACK_BIAS;
@@ -1330,13 +1356,13 @@ static void perf_callchain_kernel(struct pt_regs *regs,
1330 pc = sf->callers_pc; 1356 pc = sf->callers_pc;
1331 fp = (unsigned long)sf->fp + STACK_BIAS; 1357 fp = (unsigned long)sf->fp + STACK_BIAS;
1332 } 1358 }
1333 callchain_store(entry, pc); 1359 perf_callchain_store(entry, pc);
1334#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1360#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1335 if ((pc + 8UL) == (unsigned long) &return_to_handler) { 1361 if ((pc + 8UL) == (unsigned long) &return_to_handler) {
1336 int index = current->curr_ret_stack; 1362 int index = current->curr_ret_stack;
1337 if (current->ret_stack && index >= graph) { 1363 if (current->ret_stack && index >= graph) {
1338 pc = current->ret_stack[index - graph].ret; 1364 pc = current->ret_stack[index - graph].ret;
1339 callchain_store(entry, pc); 1365 perf_callchain_store(entry, pc);
1340 graph++; 1366 graph++;
1341 } 1367 }
1342 } 1368 }
@@ -1344,13 +1370,12 @@ static void perf_callchain_kernel(struct pt_regs *regs,
1344 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1370 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1345} 1371}
1346 1372
1347static void perf_callchain_user_64(struct pt_regs *regs, 1373static void perf_callchain_user_64(struct perf_callchain_entry *entry,
1348 struct perf_callchain_entry *entry) 1374 struct pt_regs *regs)
1349{ 1375{
1350 unsigned long ufp; 1376 unsigned long ufp;
1351 1377
1352 callchain_store(entry, PERF_CONTEXT_USER); 1378 perf_callchain_store(entry, regs->tpc);
1353 callchain_store(entry, regs->tpc);
1354 1379
1355 ufp = regs->u_regs[UREG_I6] + STACK_BIAS; 1380 ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
1356 do { 1381 do {
@@ -1363,17 +1388,16 @@ static void perf_callchain_user_64(struct pt_regs *regs,
1363 1388
1364 pc = sf.callers_pc; 1389 pc = sf.callers_pc;
1365 ufp = (unsigned long)sf.fp + STACK_BIAS; 1390 ufp = (unsigned long)sf.fp + STACK_BIAS;
1366 callchain_store(entry, pc); 1391 perf_callchain_store(entry, pc);
1367 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1392 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1368} 1393}
1369 1394
1370static void perf_callchain_user_32(struct pt_regs *regs, 1395static void perf_callchain_user_32(struct perf_callchain_entry *entry,
1371 struct perf_callchain_entry *entry) 1396 struct pt_regs *regs)
1372{ 1397{
1373 unsigned long ufp; 1398 unsigned long ufp;
1374 1399
1375 callchain_store(entry, PERF_CONTEXT_USER); 1400 perf_callchain_store(entry, regs->tpc);
1376 callchain_store(entry, regs->tpc);
1377 1401
1378 ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; 1402 ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
1379 do { 1403 do {
@@ -1386,34 +1410,16 @@ static void perf_callchain_user_32(struct pt_regs *regs,
1386 1410
1387 pc = sf.callers_pc; 1411 pc = sf.callers_pc;
1388 ufp = (unsigned long)sf.fp; 1412 ufp = (unsigned long)sf.fp;
1389 callchain_store(entry, pc); 1413 perf_callchain_store(entry, pc);
1390 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1414 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1391} 1415}
1392 1416
1393/* Like powerpc we can't get PMU interrupts within the PMU handler, 1417void
1394 * so no need for separate NMI and IRQ chains as on x86. 1418perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1395 */
1396static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
1397
1398struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1399{ 1419{
1400 struct perf_callchain_entry *entry = &__get_cpu_var(callchain); 1420 flushw_user();
1401 1421 if (test_thread_flag(TIF_32BIT))
1402 entry->nr = 0; 1422 perf_callchain_user_32(entry, regs);
1403 if (!user_mode(regs)) { 1423 else
1404 stack_trace_flush(); 1424 perf_callchain_user_64(entry, regs);
1405 perf_callchain_kernel(regs, entry);
1406 if (current->mm)
1407 regs = task_pt_regs(current);
1408 else
1409 regs = NULL;
1410 }
1411 if (regs) {
1412 flushw_user();
1413 if (test_thread_flag(TIF_32BIT))
1414 perf_callchain_user_32(regs, entry);
1415 else
1416 perf_callchain_user_64(regs, entry);
1417 }
1418 return entry;
1419} 1425}
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c
index 596c60086930..9a27d563fc30 100644
--- a/arch/tile/kernel/irq.c
+++ b/arch/tile/kernel/irq.c
@@ -208,7 +208,7 @@ static void tile_irq_chip_eoi(unsigned int irq)
208} 208}
209 209
210static struct irq_chip tile_irq_chip = { 210static struct irq_chip tile_irq_chip = {
211 .typename = "tile_irq_chip", 211 .name = "tile_irq_chip",
212 .ack = tile_irq_chip_ack, 212 .ack = tile_irq_chip_ack,
213 .eoi = tile_irq_chip_eoi, 213 .eoi = tile_irq_chip_eoi,
214 .mask = tile_irq_chip_mask, 214 .mask = tile_irq_chip_mask,
@@ -288,7 +288,7 @@ int show_interrupts(struct seq_file *p, void *v)
288 for_each_online_cpu(j) 288 for_each_online_cpu(j)
289 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 289 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
290#endif 290#endif
291 seq_printf(p, " %14s", irq_desc[i].chip->typename); 291 seq_printf(p, " %14s", irq_desc[i].chip->name);
292 seq_printf(p, " %s", action->name); 292 seq_printf(p, " %s", action->name);
293 293
294 for (action = action->next; action; action = action->next) 294 for (action = action->next; action; action = action->next)
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index 0c46e398cd8f..63c740a85b4c 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -40,6 +40,11 @@ static char *mixer = HOSTAUDIO_DEV_MIXER;
40" This is used to specify the host mixer device to the hostaudio driver.\n"\ 40" This is used to specify the host mixer device to the hostaudio driver.\n"\
41" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" 41" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n"
42 42
43module_param(dsp, charp, 0644);
44MODULE_PARM_DESC(dsp, DSP_HELP);
45module_param(mixer, charp, 0644);
46MODULE_PARM_DESC(mixer, MIXER_HELP);
47
43#ifndef MODULE 48#ifndef MODULE
44static int set_dsp(char *name, int *add) 49static int set_dsp(char *name, int *add)
45{ 50{
@@ -56,15 +61,6 @@ static int set_mixer(char *name, int *add)
56} 61}
57 62
58__uml_setup("mixer=", set_mixer, "mixer=<mixer device>\n" MIXER_HELP); 63__uml_setup("mixer=", set_mixer, "mixer=<mixer device>\n" MIXER_HELP);
59
60#else /*MODULE*/
61
62module_param(dsp, charp, 0644);
63MODULE_PARM_DESC(dsp, DSP_HELP);
64
65module_param(mixer, charp, 0644);
66MODULE_PARM_DESC(mixer, MIXER_HELP);
67
68#endif 64#endif
69 65
70/* /dev/dsp file operations */ 66/* /dev/dsp file operations */
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 1bcd208c459f..9734994cba1e 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -163,6 +163,7 @@ struct ubd {
163 struct scatterlist sg[MAX_SG]; 163 struct scatterlist sg[MAX_SG];
164 struct request *request; 164 struct request *request;
165 int start_sg, end_sg; 165 int start_sg, end_sg;
166 sector_t rq_pos;
166}; 167};
167 168
168#define DEFAULT_COW { \ 169#define DEFAULT_COW { \
@@ -187,6 +188,7 @@ struct ubd {
187 .request = NULL, \ 188 .request = NULL, \
188 .start_sg = 0, \ 189 .start_sg = 0, \
189 .end_sg = 0, \ 190 .end_sg = 0, \
191 .rq_pos = 0, \
190} 192}
191 193
192/* Protected by ubd_lock */ 194/* Protected by ubd_lock */
@@ -1228,7 +1230,6 @@ static void do_ubd_request(struct request_queue *q)
1228{ 1230{
1229 struct io_thread_req *io_req; 1231 struct io_thread_req *io_req;
1230 struct request *req; 1232 struct request *req;
1231 sector_t sector;
1232 int n; 1233 int n;
1233 1234
1234 while(1){ 1235 while(1){
@@ -1239,12 +1240,12 @@ static void do_ubd_request(struct request_queue *q)
1239 return; 1240 return;
1240 1241
1241 dev->request = req; 1242 dev->request = req;
1243 dev->rq_pos = blk_rq_pos(req);
1242 dev->start_sg = 0; 1244 dev->start_sg = 0;
1243 dev->end_sg = blk_rq_map_sg(q, req, dev->sg); 1245 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1244 } 1246 }
1245 1247
1246 req = dev->request; 1248 req = dev->request;
1247 sector = blk_rq_pos(req);
1248 while(dev->start_sg < dev->end_sg){ 1249 while(dev->start_sg < dev->end_sg){
1249 struct scatterlist *sg = &dev->sg[dev->start_sg]; 1250 struct scatterlist *sg = &dev->sg[dev->start_sg];
1250 1251
@@ -1256,10 +1257,9 @@ static void do_ubd_request(struct request_queue *q)
1256 return; 1257 return;
1257 } 1258 }
1258 prepare_request(req, io_req, 1259 prepare_request(req, io_req,
1259 (unsigned long long)sector << 9, 1260 (unsigned long long)dev->rq_pos << 9,
1260 sg->offset, sg->length, sg_page(sg)); 1261 sg->offset, sg->length, sg_page(sg));
1261 1262
1262 sector += sg->length >> 9;
1263 n = os_write_file(thread_fd, &io_req, 1263 n = os_write_file(thread_fd, &io_req,
1264 sizeof(struct io_thread_req *)); 1264 sizeof(struct io_thread_req *));
1265 if(n != sizeof(struct io_thread_req *)){ 1265 if(n != sizeof(struct io_thread_req *)){
@@ -1272,6 +1272,7 @@ static void do_ubd_request(struct request_queue *q)
1272 return; 1272 return;
1273 } 1273 }
1274 1274
1275 dev->rq_pos += sg->length >> 9;
1275 dev->start_sg++; 1276 dev->start_sg++;
1276 } 1277 }
1277 dev->end_sg = 0; 1278 dev->end_sg = 0;
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index a3f0b04d7101..a746e3037a5b 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -46,7 +46,7 @@ int show_interrupts(struct seq_file *p, void *v)
46 for_each_online_cpu(j) 46 for_each_online_cpu(j)
47 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 47 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
48#endif 48#endif
49 seq_printf(p, " %14s", irq_desc[i].chip->typename); 49 seq_printf(p, " %14s", irq_desc[i].chip->name);
50 seq_printf(p, " %s", action->name); 50 seq_printf(p, " %s", action->name);
51 51
52 for (action=action->next; action; action = action->next) 52 for (action=action->next; action; action = action->next)
@@ -369,7 +369,7 @@ static void dummy(unsigned int irq)
369 369
370/* This is used for everything else than the timer. */ 370/* This is used for everything else than the timer. */
371static struct irq_chip normal_irq_type = { 371static struct irq_chip normal_irq_type = {
372 .typename = "SIGIO", 372 .name = "SIGIO",
373 .release = free_irq_by_irq_and_dev, 373 .release = free_irq_by_irq_and_dev,
374 .disable = dummy, 374 .disable = dummy,
375 .enable = dummy, 375 .enable = dummy,
@@ -378,7 +378,7 @@ static struct irq_chip normal_irq_type = {
378}; 378};
379 379
380static struct irq_chip SIGVTALRM_irq_type = { 380static struct irq_chip SIGVTALRM_irq_type = {
381 .typename = "SIGVTALRM", 381 .name = "SIGVTALRM",
382 .release = free_irq_by_irq_and_dev, 382 .release = free_irq_by_irq_and_dev,
383 .shutdown = dummy, /* never called */ 383 .shutdown = dummy, /* never called */
384 .disable = dummy, 384 .disable = dummy,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cea0cd9a316f..7ab9db88ab6a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -25,6 +25,7 @@ config X86
25 select HAVE_IDE 25 select HAVE_IDE
26 select HAVE_OPROFILE 26 select HAVE_OPROFILE
27 select HAVE_PERF_EVENTS if (!M386 && !M486) 27 select HAVE_PERF_EVENTS if (!M386 && !M486)
28 select HAVE_IRQ_WORK
28 select HAVE_IOREMAP_PROT 29 select HAVE_IOREMAP_PROT
29 select HAVE_KPROBES 30 select HAVE_KPROBES
30 select ARCH_WANT_OPTIONAL_GPIOLIB 31 select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -33,6 +34,7 @@ config X86
33 select HAVE_KRETPROBES 34 select HAVE_KRETPROBES
34 select HAVE_OPTPROBES 35 select HAVE_OPTPROBES
35 select HAVE_FTRACE_MCOUNT_RECORD 36 select HAVE_FTRACE_MCOUNT_RECORD
37 select HAVE_C_RECORDMCOUNT
36 select HAVE_DYNAMIC_FTRACE 38 select HAVE_DYNAMIC_FTRACE
37 select HAVE_FUNCTION_TRACER 39 select HAVE_FUNCTION_TRACER
38 select HAVE_FUNCTION_GRAPH_TRACER 40 select HAVE_FUNCTION_GRAPH_TRACER
@@ -59,6 +61,12 @@ config X86
59 select ANON_INODES 61 select ANON_INODES
60 select HAVE_ARCH_KMEMCHECK 62 select HAVE_ARCH_KMEMCHECK
61 select HAVE_USER_RETURN_NOTIFIER 63 select HAVE_USER_RETURN_NOTIFIER
64 select HAVE_ARCH_JUMP_LABEL
65 select HAVE_TEXT_POKE_SMP
66 select HAVE_GENERIC_HARDIRQS
67 select HAVE_SPARSE_IRQ
68 select GENERIC_IRQ_PROBE
69 select GENERIC_PENDING_IRQ if SMP
62 70
63config INSTRUCTION_DECODER 71config INSTRUCTION_DECODER
64 def_bool (KPROBES || PERF_EVENTS) 72 def_bool (KPROBES || PERF_EVENTS)
@@ -200,20 +208,6 @@ config HAVE_INTEL_TXT
200 def_bool y 208 def_bool y
201 depends on EXPERIMENTAL && DMAR && ACPI 209 depends on EXPERIMENTAL && DMAR && ACPI
202 210
203# Use the generic interrupt handling code in kernel/irq/:
204config GENERIC_HARDIRQS
205 def_bool y
206
207config GENERIC_HARDIRQS_NO__DO_IRQ
208 def_bool y
209
210config GENERIC_IRQ_PROBE
211 def_bool y
212
213config GENERIC_PENDING_IRQ
214 def_bool y
215 depends on GENERIC_HARDIRQS && SMP
216
217config USE_GENERIC_SMP_HELPERS 211config USE_GENERIC_SMP_HELPERS
218 def_bool y 212 def_bool y
219 depends on SMP 213 depends on SMP
@@ -296,23 +290,6 @@ config X86_X2APIC
296 290
297 If you don't know what to do here, say N. 291 If you don't know what to do here, say N.
298 292
299config SPARSE_IRQ
300 bool "Support sparse irq numbering"
301 depends on PCI_MSI || HT_IRQ
302 ---help---
303 This enables support for sparse irqs. This is useful for distro
304 kernels that want to define a high CONFIG_NR_CPUS value but still
305 want to have low kernel memory footprint on smaller machines.
306
307 ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread
308 out the irq_desc[] array in a more NUMA-friendly way. )
309
310 If you don't know what to do here, say N.
311
312config NUMA_IRQ_DESC
313 def_bool y
314 depends on SPARSE_IRQ && NUMA
315
316config X86_MPPARSE 293config X86_MPPARSE
317 bool "Enable MPS table" if ACPI 294 bool "Enable MPS table" if ACPI
318 default y 295 default y
@@ -517,25 +494,6 @@ if PARAVIRT_GUEST
517 494
518source "arch/x86/xen/Kconfig" 495source "arch/x86/xen/Kconfig"
519 496
520config VMI
521 bool "VMI Guest support (DEPRECATED)"
522 select PARAVIRT
523 depends on X86_32
524 ---help---
525 VMI provides a paravirtualized interface to the VMware ESX server
526 (it could be used by other hypervisors in theory too, but is not
527 at the moment), by linking the kernel to a GPL-ed ROM module
528 provided by the hypervisor.
529
530 As of September 2009, VMware has started a phased retirement
531 of this feature from VMware's products. Please see
532 feature-removal-schedule.txt for details. If you are
533 planning to enable this option, please note that you cannot
534 live migrate a VMI enabled VM to a future VMware product,
535 which doesn't support VMI. So if you expect your kernel to
536 seamlessly migrate to newer VMware products, keep this
537 disabled.
538
539config KVM_CLOCK 497config KVM_CLOCK
540 bool "KVM paravirtualized clock" 498 bool "KVM paravirtualized clock"
541 select PARAVIRT 499 select PARAVIRT
@@ -670,7 +628,7 @@ config GART_IOMMU
670 bool "GART IOMMU support" if EMBEDDED 628 bool "GART IOMMU support" if EMBEDDED
671 default y 629 default y
672 select SWIOTLB 630 select SWIOTLB
673 depends on X86_64 && PCI && K8_NB 631 depends on X86_64 && PCI && AMD_NB
674 ---help--- 632 ---help---
675 Support for full DMA access of devices with 32bit memory access only 633 Support for full DMA access of devices with 32bit memory access only
676 on systems with more than 3GB. This is usually needed for USB, 634 on systems with more than 3GB. This is usually needed for USB,
@@ -795,6 +753,17 @@ config SCHED_MC
795 making when dealing with multi-core CPU chips at a cost of slightly 753 making when dealing with multi-core CPU chips at a cost of slightly
796 increased overhead in some places. If unsure say N here. 754 increased overhead in some places. If unsure say N here.
797 755
756config IRQ_TIME_ACCOUNTING
757 bool "Fine granularity task level IRQ time accounting"
758 default n
759 ---help---
760 Select this option to enable fine granularity task irq time
761 accounting. This is done by reading a timestamp on each
762 transitions between softirq and hardirq state, so there can be a
763 small performance impact.
764
765 If in doubt, say N here.
766
798source "kernel/Kconfig.preempt" 767source "kernel/Kconfig.preempt"
799 768
800config X86_UP_APIC 769config X86_UP_APIC
@@ -1148,6 +1117,9 @@ config X86_PAE
1148config ARCH_PHYS_ADDR_T_64BIT 1117config ARCH_PHYS_ADDR_T_64BIT
1149 def_bool X86_64 || X86_PAE 1118 def_bool X86_64 || X86_PAE
1150 1119
1120config ARCH_DMA_ADDR_T_64BIT
1121 def_bool X86_64 || HIGHMEM64G
1122
1151config DIRECT_GBPAGES 1123config DIRECT_GBPAGES
1152 bool "Enable 1GB pages for kernel pagetables" if EMBEDDED 1124 bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
1153 default y 1125 default y
@@ -1326,25 +1298,34 @@ config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
1326 Set whether the default state of memory_corruption_check is 1298 Set whether the default state of memory_corruption_check is
1327 on or off. 1299 on or off.
1328 1300
1329config X86_RESERVE_LOW_64K 1301config X86_RESERVE_LOW
1330 bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen" 1302 int "Amount of low memory, in kilobytes, to reserve for the BIOS"
1331 default y 1303 default 64
1304 range 4 640
1332 ---help--- 1305 ---help---
1333 Reserve the first 64K of physical RAM on BIOSes that are known 1306 Specify the amount of low memory to reserve for the BIOS.
1334 to potentially corrupt that memory range. A numbers of BIOSes are 1307
1335 known to utilize this area during suspend/resume, so it must not 1308 The first page contains BIOS data structures that the kernel
1336 be used by the kernel. 1309 must not use, so that page must always be reserved.
1337 1310
1338 Set this to N if you are absolutely sure that you trust the BIOS 1311 By default we reserve the first 64K of physical RAM, as a
1339 to get all its memory reservations and usages right. 1312 number of BIOSes are known to corrupt that memory range
1313 during events such as suspend/resume or monitor cable
1314 insertion, so it must not be used by the kernel.
1340 1315
1341 If you have doubts about the BIOS (e.g. suspend/resume does not 1316 You can set this to 4 if you are absolutely sure that you
1342 work or there's kernel crashes after certain hardware hotplug 1317 trust the BIOS to get all its memory reservations and usages
1343 events) and it's not AMI or Phoenix, then you might want to enable 1318 right. If you know your BIOS have problems beyond the
1344 X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical 1319 default 64K area, you can set this to 640 to avoid using the
1345 corruption patterns. 1320 entire low memory range.
1346 1321
1347 Say Y if unsure. 1322 If you have doubts about the BIOS (e.g. suspend/resume does
1323 not work or there's kernel crashes after certain hardware
1324 hotplug events) then you might want to enable
1325 X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check
1326 typical corruption patterns.
1327
1328 Leave this to the default value of 64 if you are unsure.
1348 1329
1349config MATH_EMULATION 1330config MATH_EMULATION
1350 bool 1331 bool
@@ -1900,7 +1881,7 @@ config PCI_GODIRECT
1900 bool "Direct" 1881 bool "Direct"
1901 1882
1902config PCI_GOOLPC 1883config PCI_GOOLPC
1903 bool "OLPC" 1884 bool "OLPC XO-1"
1904 depends on OLPC 1885 depends on OLPC
1905 1886
1906config PCI_GOANY 1887config PCI_GOANY
@@ -2061,14 +2042,21 @@ config SCx200HR_TIMER
2061config OLPC 2042config OLPC
2062 bool "One Laptop Per Child support" 2043 bool "One Laptop Per Child support"
2063 select GPIOLIB 2044 select GPIOLIB
2045 select OLPC_OPENFIRMWARE
2064 ---help--- 2046 ---help---
2065 Add support for detecting the unique features of the OLPC 2047 Add support for detecting the unique features of the OLPC
2066 XO hardware. 2048 XO hardware.
2067 2049
2050config OLPC_XO1
2051 tristate "OLPC XO-1 support"
2052 depends on OLPC && PCI
2053 ---help---
2054 Add support for non-essential features of the OLPC XO-1 laptop.
2055
2068config OLPC_OPENFIRMWARE 2056config OLPC_OPENFIRMWARE
2069 bool "Support for OLPC's Open Firmware" 2057 bool "Support for OLPC's Open Firmware"
2070 depends on !X86_64 && !X86_PAE 2058 depends on !X86_64 && !X86_PAE
2071 default y if OLPC 2059 default n
2072 help 2060 help
2073 This option adds support for the implementation of Open Firmware 2061 This option adds support for the implementation of Open Firmware
2074 that is used on the OLPC XO-1 Children's Machine. 2062 that is used on the OLPC XO-1 Children's Machine.
@@ -2076,7 +2064,7 @@ config OLPC_OPENFIRMWARE
2076 2064
2077endif # X86_32 2065endif # X86_32
2078 2066
2079config K8_NB 2067config AMD_NB
2080 def_bool y 2068 def_bool y
2081 depends on CPU_SUP_AMD && PCI 2069 depends on CPU_SUP_AMD && PCI
2082 2070
@@ -2125,6 +2113,10 @@ config HAVE_ATOMIC_IOMAP
2125 def_bool y 2113 def_bool y
2126 depends on X86_32 2114 depends on X86_32
2127 2115
2116config HAVE_TEXT_POKE_SMP
2117 bool
2118 select STOP_MACHINE if SMP
2119
2128source "net/Kconfig" 2120source "net/Kconfig"
2129 2121
2130source "drivers/Kconfig" 2122source "drivers/Kconfig"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 75085080b63e..e5bb96b10f1a 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -43,6 +43,10 @@ config EARLY_PRINTK
43 with klogd/syslogd or the X server. You should normally N here, 43 with klogd/syslogd or the X server. You should normally N here,
44 unless you want to debug such a crash. 44 unless you want to debug such a crash.
45 45
46config EARLY_PRINTK_MRST
47 bool "Early printk for MRST platform support"
48 depends on EARLY_PRINTK && X86_MRST
49
46config EARLY_PRINTK_DBGP 50config EARLY_PRINTK_DBGP
47 bool "Early printk via EHCI debug port" 51 bool "Early printk via EHCI debug port"
48 depends on EARLY_PRINTK && PCI 52 depends on EARLY_PRINTK && PCI
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index e8c8881351b3..b02e509072a7 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -96,8 +96,12 @@ cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_en
96# is .cfi_signal_frame supported too? 96# is .cfi_signal_frame supported too?
97cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) 97cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
98cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) 98cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
99KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) 99
100KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) 100# does binutils support specific instructions?
101asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
102
103KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
104KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
101 105
102LDFLAGS := -m elf_$(UTS_MACHINE) 106LDFLAGS := -m elf_$(UTS_MACHINE)
103 107
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 0350311906ae..2d93bdbc9ac0 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -34,7 +34,7 @@
34#include <asm/ia32.h> 34#include <asm/ia32.h>
35 35
36#undef WARN_OLD 36#undef WARN_OLD
37#undef CORE_DUMP /* probably broken */ 37#undef CORE_DUMP /* definitely broken */
38 38
39static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs); 39static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs);
40static int load_aout_library(struct file *); 40static int load_aout_library(struct file *);
@@ -131,21 +131,15 @@ static void set_brk(unsigned long start, unsigned long end)
131 * macros to write out all the necessary info. 131 * macros to write out all the necessary info.
132 */ 132 */
133 133
134static int dump_write(struct file *file, const void *addr, int nr) 134#include <linux/coredump.h>
135{
136 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
137}
138 135
139#define DUMP_WRITE(addr, nr) \ 136#define DUMP_WRITE(addr, nr) \
140 if (!dump_write(file, (void *)(addr), (nr))) \ 137 if (!dump_write(file, (void *)(addr), (nr))) \
141 goto end_coredump; 138 goto end_coredump;
142 139
143#define DUMP_SEEK(offset) \ 140#define DUMP_SEEK(offset) \
144 if (file->f_op->llseek) { \ 141 if (!dump_seek(file, offset)) \
145 if (file->f_op->llseek(file, (offset), 0) != (offset)) \ 142 goto end_coredump;
146 goto end_coredump; \
147 } else \
148 file->f_pos = (offset)
149 143
150#define START_DATA() (u.u_tsize << PAGE_SHIFT) 144#define START_DATA() (u.u_tsize << PAGE_SHIFT)
151#define START_STACK(u) (u.start_stack) 145#define START_STACK(u) (u.start_stack)
@@ -217,12 +211,6 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
217 dump_size = dump.u_ssize << PAGE_SHIFT; 211 dump_size = dump.u_ssize << PAGE_SHIFT;
218 DUMP_WRITE(dump_start, dump_size); 212 DUMP_WRITE(dump_start, dump_size);
219 } 213 }
220 /*
221 * Finally dump the task struct. Not be used by gdb, but
222 * could be useful
223 */
224 set_fs(KERNEL_DS);
225 DUMP_WRITE(current, sizeof(*current));
226end_coredump: 214end_coredump:
227 set_fs(fs); 215 set_fs(fs);
228 return has_dumped; 216 return has_dumped;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index bc6abb7bc7ee..76561d20ea2f 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,6 +4,7 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/stddef.h> 5#include <linux/stddef.h>
6#include <linux/stringify.h> 6#include <linux/stringify.h>
7#include <linux/jump_label.h>
7#include <asm/asm.h> 8#include <asm/asm.h>
8 9
9/* 10/*
@@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
160#define __parainstructions_end NULL 161#define __parainstructions_end NULL
161#endif 162#endif
162 163
164extern void *text_poke_early(void *addr, const void *opcode, size_t len);
165
163/* 166/*
164 * Clear and restore the kernel write-protection flag on the local CPU. 167 * Clear and restore the kernel write-protection flag on the local CPU.
165 * Allows the kernel to edit read-only pages. 168 * Allows the kernel to edit read-only pages.
@@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
180extern void *text_poke(void *addr, const void *opcode, size_t len); 183extern void *text_poke(void *addr, const void *opcode, size_t len);
181extern void *text_poke_smp(void *addr, const void *opcode, size_t len); 184extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
182 185
186#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
187#define IDEAL_NOP_SIZE_5 5
188extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
189extern void arch_init_ideal_nop5(void);
190#else
191static inline void arch_init_ideal_nop5(void) {}
192#endif
193
183#endif /* _ASM_X86_ALTERNATIVE_H */ 194#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index 5af2982133b5..a6863a2dec1f 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -24,11 +24,11 @@
24 24
25#ifdef CONFIG_AMD_IOMMU 25#ifdef CONFIG_AMD_IOMMU
26 26
27extern void amd_iommu_detect(void); 27extern int amd_iommu_detect(void);
28 28
29#else 29#else
30 30
31static inline void amd_iommu_detect(void) { } 31static inline int amd_iommu_detect(void) { return -ENODEV; }
32 32
33#endif 33#endif
34 34
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index cb030374b90a..916bc8111a01 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2009-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify it 5 * This program is free software; you can redistribute it and/or modify it
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 08616180deaf..e3509fc303bf 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -416,13 +416,22 @@ struct amd_iommu {
416 struct dma_ops_domain *default_dom; 416 struct dma_ops_domain *default_dom;
417 417
418 /* 418 /*
419 * This array is required to work around a potential BIOS bug. 419 * We can't rely on the BIOS to restore all values on reinit, so we
420 * The BIOS may miss to restore parts of the PCI configuration 420 * need to stash them
421 * space when the system resumes from S3. The result is that the
422 * IOMMU does not execute commands anymore which leads to system
423 * failure.
424 */ 421 */
425 u32 cache_cfg[4]; 422
423 /* The iommu BAR */
424 u32 stored_addr_lo;
425 u32 stored_addr_hi;
426
427 /*
428 * Each iommu has 6 l1s, each of which is documented as having 0x12
429 * registers
430 */
431 u32 stored_l1[6][0x12];
432
433 /* The l2 indirect registers */
434 u32 stored_l2[0x83];
426}; 435};
427 436
428/* 437/*
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/amd_nb.h
index af00bd1d2089..c8517f81b21e 100644
--- a/arch/x86/include/asm/k8.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -1,5 +1,5 @@
1#ifndef _ASM_X86_K8_H 1#ifndef _ASM_X86_AMD_NB_H
2#define _ASM_X86_K8_H 2#define _ASM_X86_AMD_NB_H
3 3
4#include <linux/pci.h> 4#include <linux/pci.h>
5 5
@@ -7,24 +7,27 @@ extern struct pci_device_id k8_nb_ids[];
7struct bootnode; 7struct bootnode;
8 8
9extern int early_is_k8_nb(u32 value); 9extern int early_is_k8_nb(u32 value);
10extern struct pci_dev **k8_northbridges;
11extern int num_k8_northbridges;
12extern int cache_k8_northbridges(void); 10extern int cache_k8_northbridges(void);
13extern void k8_flush_garts(void); 11extern void k8_flush_garts(void);
14extern int k8_get_nodes(struct bootnode *nodes); 12extern int k8_get_nodes(struct bootnode *nodes);
15extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); 13extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
16extern int k8_scan_nodes(void); 14extern int k8_scan_nodes(void);
17 15
18#ifdef CONFIG_K8_NB 16struct k8_northbridge_info {
19extern int num_k8_northbridges; 17 u16 num;
18 u8 gart_supported;
19 struct pci_dev **nb_misc;
20};
21extern struct k8_northbridge_info k8_northbridges;
22
23#ifdef CONFIG_AMD_NB
20 24
21static inline struct pci_dev *node_to_k8_nb_misc(int node) 25static inline struct pci_dev *node_to_k8_nb_misc(int node)
22{ 26{
23 return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; 27 return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL;
24} 28}
25 29
26#else 30#else
27#define num_k8_northbridges 0
28 31
29static inline struct pci_dev *node_to_k8_nb_misc(int node) 32static inline struct pci_dev *node_to_k8_nb_misc(int node)
30{ 33{
@@ -33,4 +36,4 @@ static inline struct pci_dev *node_to_k8_nb_misc(int node)
33#endif 36#endif
34 37
35 38
36#endif /* _ASM_X86_K8_H */ 39#endif /* _ASM_X86_AMD_NB_H */
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h
index a69b1ac9eaf8..2fefa501d3ba 100644
--- a/arch/x86/include/asm/apb_timer.h
+++ b/arch/x86/include/asm/apb_timer.h
@@ -54,7 +54,6 @@ extern struct clock_event_device *global_clock_event;
54extern unsigned long apbt_quick_calibrate(void); 54extern unsigned long apbt_quick_calibrate(void);
55extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); 55extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu);
56extern void apbt_setup_secondary_clock(void); 56extern void apbt_setup_secondary_clock(void);
57extern unsigned int boot_cpu_id;
58 57
59extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); 58extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint);
60extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); 59extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr);
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 1fa03e04ae44..286de34b0ed6 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -252,9 +252,7 @@ static inline int apic_is_clustered_box(void)
252} 252}
253#endif 253#endif
254 254
255extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask); 255extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
256extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
257
258 256
259#else /* !CONFIG_X86_LOCAL_APIC */ 257#else /* !CONFIG_X86_LOCAL_APIC */
260static inline void lapic_shutdown(void) { } 258static inline void lapic_shutdown(void) { }
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 7fe3b3060f08..a859ca461fb0 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -131,6 +131,7 @@
131#define APIC_EILVTn(n) (0x500 + 0x10 * n) 131#define APIC_EILVTn(n) (0x500 + 0x10 * n)
132#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ 132#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */
133#define APIC_EILVT_NR_AMD_10H 4 133#define APIC_EILVT_NR_AMD_10H 4
134#define APIC_EILVT_NR_MAX APIC_EILVT_NR_AMD_10H
134#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF) 135#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF)
135#define APIC_EILVT_MSG_FIX 0x0 136#define APIC_EILVT_MSG_FIX 0x0
136#define APIC_EILVT_MSG_SMI 0x2 137#define APIC_EILVT_MSG_SMI 0x2
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
index 0918654305af..0d467b338835 100644
--- a/arch/x86/include/asm/calgary.h
+++ b/arch/x86/include/asm/calgary.h
@@ -62,9 +62,9 @@ struct cal_chipset_ops {
62extern int use_calgary; 62extern int use_calgary;
63 63
64#ifdef CONFIG_CALGARY_IOMMU 64#ifdef CONFIG_CALGARY_IOMMU
65extern void detect_calgary(void); 65extern int detect_calgary(void);
66#else 66#else
67static inline void detect_calgary(void) { return; } 67static inline int detect_calgary(void) { return -ENODEV; }
68#endif 68#endif
69 69
70#endif /* _ASM_X86_CALGARY_H */ 70#endif /* _ASM_X86_CALGARY_H */
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index b185091bf19c..4fab24de26b1 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -32,6 +32,5 @@ extern void arch_unregister_cpu(int);
32 32
33DECLARE_PER_CPU(int, cpu_state); 33DECLARE_PER_CPU(int, cpu_state);
34 34
35extern unsigned int boot_cpu_id;
36 35
37#endif /* _ASM_X86_CPU_H */ 36#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 3f76523589af..220e2ea08e80 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -152,10 +152,14 @@
152#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */ 152#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
153#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */ 153#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */
154#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */ 154#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */
155#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ 155#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */
156#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ 156#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */
157#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ 157#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
158#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */
159#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */
158#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ 160#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
161#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
162#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
159 163
160/* 164/*
161 * Auxiliary flags: Linux defined - For features scattered in various 165 * Auxiliary flags: Linux defined - For features scattered in various
@@ -180,6 +184,13 @@
180#define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */ 184#define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */
181#define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ 185#define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */
182#define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ 186#define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */
187#define X86_FEATURE_TSCRATEMSR (8*32+ 9) /* "tsc_scale" AMD TSC scaling support */
188#define X86_FEATURE_VMCBCLEAN (8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */
189#define X86_FEATURE_FLUSHBYASID (8*32+11) /* AMD flush-by-ASID support */
190#define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */
191#define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */
192#define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */
193
183 194
184/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ 195/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
185#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ 196#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 733f7e91e7a9..326099199318 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -89,6 +89,16 @@
89 CFI_ADJUST_CFA_OFFSET -8 89 CFI_ADJUST_CFA_OFFSET -8
90 .endm 90 .endm
91 91
92 .macro pushfq_cfi
93 pushfq
94 CFI_ADJUST_CFA_OFFSET 8
95 .endm
96
97 .macro popfq_cfi
98 popfq
99 CFI_ADJUST_CFA_OFFSET -8
100 .endm
101
92 .macro movq_cfi reg offset=0 102 .macro movq_cfi reg offset=0
93 movq %\reg, \offset(%rsp) 103 movq %\reg, \offset(%rsp)
94 CFI_REL_OFFSET \reg, \offset 104 CFI_REL_OFFSET \reg, \offset
@@ -109,6 +119,16 @@
109 CFI_ADJUST_CFA_OFFSET -4 119 CFI_ADJUST_CFA_OFFSET -4
110 .endm 120 .endm
111 121
122 .macro pushfl_cfi
123 pushfl
124 CFI_ADJUST_CFA_OFFSET 4
125 .endm
126
127 .macro popfl_cfi
128 popfl
129 CFI_ADJUST_CFA_OFFSET -4
130 .endm
131
112 .macro movl_cfi reg offset=0 132 .macro movl_cfi reg offset=0
113 movl %\reg, \offset(%esp) 133 movl %\reg, \offset(%esp)
114 CFI_REL_OFFSET \reg, \offset 134 CFI_REL_OFFSET \reg, \offset
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 8e8ec663a98f..b8e96a18676b 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
49BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) 49BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
50BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) 50BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
51 51
52#ifdef CONFIG_PERF_EVENTS 52#ifdef CONFIG_IRQ_WORK
53BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) 53BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
54#endif 54#endif
55 55
56#ifdef CONFIG_X86_THERMAL_VECTOR 56#ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index d07b44f7d1dc..4d293dced62f 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -214,5 +214,20 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
214 BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); 214 BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
215 return __virt_to_fix(vaddr); 215 return __virt_to_fix(vaddr);
216} 216}
217
218/* Return an pointer with offset calculated */
219static inline unsigned long __set_fixmap_offset(enum fixed_addresses idx,
220 phys_addr_t phys, pgprot_t flags)
221{
222 __set_fixmap(idx, phys, flags);
223 return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1));
224}
225
226#define set_fixmap_offset(idx, phys) \
227 __set_fixmap_offset(idx, phys, PAGE_KERNEL)
228
229#define set_fixmap_offset_nocache(idx, phys) \
230 __set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE)
231
217#endif /* !__ASSEMBLY__ */ 232#endif /* !__ASSEMBLY__ */
218#endif /* _ASM_X86_FIXMAP_H */ 233#endif /* _ASM_X86_FIXMAP_H */
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 4ac5b0f33fc1..43085bfc99c3 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -17,6 +17,7 @@ extern int fix_aperture;
17#define GARTEN (1<<0) 17#define GARTEN (1<<0)
18#define DISGARTCPU (1<<4) 18#define DISGARTCPU (1<<4)
19#define DISGARTIO (1<<5) 19#define DISGARTIO (1<<5)
20#define DISTLBWALKPRB (1<<6)
20 21
21/* GART cache control register bits. */ 22/* GART cache control register bits. */
22#define INVGART (1<<0) 23#define INVGART (1<<0)
@@ -27,7 +28,6 @@ extern int fix_aperture;
27#define AMD64_GARTAPERTUREBASE 0x94 28#define AMD64_GARTAPERTUREBASE 0x94
28#define AMD64_GARTTABLEBASE 0x98 29#define AMD64_GARTTABLEBASE 0x98
29#define AMD64_GARTCACHECTL 0x9c 30#define AMD64_GARTCACHECTL 0x9c
30#define AMD64_GARTEN (1<<0)
31 31
32#ifdef CONFIG_GART_IOMMU 32#ifdef CONFIG_GART_IOMMU
33extern int gart_iommu_aperture; 33extern int gart_iommu_aperture;
@@ -37,7 +37,7 @@ extern int gart_iommu_aperture_disabled;
37extern void early_gart_iommu_check(void); 37extern void early_gart_iommu_check(void);
38extern int gart_iommu_init(void); 38extern int gart_iommu_init(void);
39extern void __init gart_parse_options(char *); 39extern void __init gart_parse_options(char *);
40extern void gart_iommu_hole_init(void); 40extern int gart_iommu_hole_init(void);
41 41
42#else 42#else
43#define gart_iommu_aperture 0 43#define gart_iommu_aperture 0
@@ -50,13 +50,27 @@ static inline void early_gart_iommu_check(void)
50static inline void gart_parse_options(char *options) 50static inline void gart_parse_options(char *options)
51{ 51{
52} 52}
53static inline void gart_iommu_hole_init(void) 53static inline int gart_iommu_hole_init(void)
54{ 54{
55 return -ENODEV;
55} 56}
56#endif 57#endif
57 58
58extern int agp_amd64_init(void); 59extern int agp_amd64_init(void);
59 60
61static inline void gart_set_size_and_enable(struct pci_dev *dev, u32 order)
62{
63 u32 ctl;
64
65 /*
66 * Don't enable translation but enable GART IO and CPU accesses.
67 * Also, set DISTLBWALKPRB since GART tables memory is UC.
68 */
69 ctl = DISTLBWALKPRB | order << 1;
70
71 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
72}
73
60static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) 74static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
61{ 75{
62 u32 tmp, ctl; 76 u32 tmp, ctl;
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index aeab29aee617..55e4de613f0e 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -14,7 +14,7 @@ typedef struct {
14#endif 14#endif
15 unsigned int x86_platform_ipis; /* arch dependent */ 15 unsigned int x86_platform_ipis; /* arch dependent */
16 unsigned int apic_perf_irqs; 16 unsigned int apic_perf_irqs;
17 unsigned int apic_pending_irqs; 17 unsigned int apic_irq_work_irqs;
18#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
19 unsigned int irq_resched_count; 19 unsigned int irq_resched_count;
20 unsigned int irq_call_count; 20 unsigned int irq_call_count;
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 1d5c08a1bdfd..2c392d663dce 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -74,10 +74,12 @@ extern void hpet_disable(void);
74extern unsigned int hpet_readl(unsigned int a); 74extern unsigned int hpet_readl(unsigned int a);
75extern void force_hpet_resume(void); 75extern void force_hpet_resume(void);
76 76
77extern void hpet_msi_unmask(unsigned int irq); 77struct irq_data;
78extern void hpet_msi_mask(unsigned int irq); 78extern void hpet_msi_unmask(struct irq_data *data);
79extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg); 79extern void hpet_msi_mask(struct irq_data *data);
80extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg); 80struct hpet_dev;
81extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
82extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
81 83
82#ifdef CONFIG_PCI_MSI 84#ifdef CONFIG_PCI_MSI
83extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); 85extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id);
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 46c0fe05f230..0274ec5a7e62 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,7 +29,7 @@
29extern void apic_timer_interrupt(void); 29extern void apic_timer_interrupt(void);
30extern void x86_platform_ipi(void); 30extern void x86_platform_ipi(void);
31extern void error_interrupt(void); 31extern void error_interrupt(void);
32extern void perf_pending_interrupt(void); 32extern void irq_work_interrupt(void);
33 33
34extern void spurious_interrupt(void); 34extern void spurious_interrupt(void);
35extern void thermal_interrupt(void); 35extern void thermal_interrupt(void);
@@ -78,6 +78,13 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
78 irq_attr->polarity = polarity; 78 irq_attr->polarity = polarity;
79} 79}
80 80
81struct irq_2_iommu {
82 struct intel_iommu *iommu;
83 u16 irte_index;
84 u16 sub_handle;
85 u8 irte_mask;
86};
87
81/* 88/*
82 * This is performance-critical, we want to do it O(1) 89 * This is performance-critical, we want to do it O(1)
83 * 90 *
@@ -89,15 +96,17 @@ struct irq_cfg {
89 cpumask_var_t old_domain; 96 cpumask_var_t old_domain;
90 u8 vector; 97 u8 vector;
91 u8 move_in_progress : 1; 98 u8 move_in_progress : 1;
99#ifdef CONFIG_INTR_REMAP
100 struct irq_2_iommu irq_2_iommu;
101#endif
92}; 102};
93 103
94extern struct irq_cfg *irq_cfg(unsigned int);
95extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); 104extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
96extern void send_cleanup_vector(struct irq_cfg *); 105extern void send_cleanup_vector(struct irq_cfg *);
97 106
98struct irq_desc; 107struct irq_data;
99extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *, 108int __ioapic_set_affinity(struct irq_data *, const struct cpumask *,
100 unsigned int *dest_id); 109 unsigned int *dest_id);
101extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); 110extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
102extern void setup_ioapic_dest(void); 111extern void setup_ioapic_dest(void);
103 112
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index a73a8d5a5e69..4aa2bb3b242a 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -55,6 +55,12 @@ extern int save_i387_xstate_ia32(void __user *buf);
55extern int restore_i387_xstate_ia32(void __user *buf); 55extern int restore_i387_xstate_ia32(void __user *buf);
56#endif 56#endif
57 57
58#ifdef CONFIG_MATH_EMULATION
59extern void finit_soft_fpu(struct i387_soft_struct *soft);
60#else
61static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
62#endif
63
58#define X87_FSW_ES (1 << 7) /* Exception Summary */ 64#define X87_FSW_ES (1 << 7) /* Exception Summary */
59 65
60static __always_inline __pure bool use_xsaveopt(void) 66static __always_inline __pure bool use_xsaveopt(void)
@@ -67,6 +73,11 @@ static __always_inline __pure bool use_xsave(void)
67 return static_cpu_has(X86_FEATURE_XSAVE); 73 return static_cpu_has(X86_FEATURE_XSAVE);
68} 74}
69 75
76static __always_inline __pure bool use_fxsr(void)
77{
78 return static_cpu_has(X86_FEATURE_FXSR);
79}
80
70extern void __sanitize_i387_state(struct task_struct *); 81extern void __sanitize_i387_state(struct task_struct *);
71 82
72static inline void sanitize_i387_state(struct task_struct *tsk) 83static inline void sanitize_i387_state(struct task_struct *tsk)
@@ -77,19 +88,11 @@ static inline void sanitize_i387_state(struct task_struct *tsk)
77} 88}
78 89
79#ifdef CONFIG_X86_64 90#ifdef CONFIG_X86_64
80
81/* Ignore delayed exceptions from user space */
82static inline void tolerant_fwait(void)
83{
84 asm volatile("1: fwait\n"
85 "2:\n"
86 _ASM_EXTABLE(1b, 2b));
87}
88
89static inline int fxrstor_checking(struct i387_fxsave_struct *fx) 91static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
90{ 92{
91 int err; 93 int err;
92 94
95 /* See comment in fxsave() below. */
93 asm volatile("1: rex64/fxrstor (%[fx])\n\t" 96 asm volatile("1: rex64/fxrstor (%[fx])\n\t"
94 "2:\n" 97 "2:\n"
95 ".section .fixup,\"ax\"\n" 98 ".section .fixup,\"ax\"\n"
@@ -98,44 +101,10 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
98 ".previous\n" 101 ".previous\n"
99 _ASM_EXTABLE(1b, 3b) 102 _ASM_EXTABLE(1b, 3b)
100 : [err] "=r" (err) 103 : [err] "=r" (err)
101#if 0 /* See comment in fxsave() below. */ 104 : [fx] "R" (fx), "m" (*fx), "0" (0));
102 : [fx] "r" (fx), "m" (*fx), "0" (0));
103#else
104 : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
105#endif
106 return err; 105 return err;
107} 106}
108 107
109/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
110 is pending. Clear the x87 state here by setting it to fixed
111 values. The kernel data segment can be sometimes 0 and sometimes
112 new user value. Both should be ok.
113 Use the PDA as safe address because it should be already in L1. */
114static inline void fpu_clear(struct fpu *fpu)
115{
116 struct xsave_struct *xstate = &fpu->state->xsave;
117 struct i387_fxsave_struct *fx = &fpu->state->fxsave;
118
119 /*
120 * xsave header may indicate the init state of the FP.
121 */
122 if (use_xsave() &&
123 !(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
124 return;
125
126 if (unlikely(fx->swd & X87_FSW_ES))
127 asm volatile("fnclex");
128 alternative_input(ASM_NOP8 ASM_NOP2,
129 " emms\n" /* clear stack tags */
130 " fildl %%gs:0", /* load to clear state */
131 X86_FEATURE_FXSAVE_LEAK);
132}
133
134static inline void clear_fpu_state(struct task_struct *tsk)
135{
136 fpu_clear(&tsk->thread.fpu);
137}
138
139static inline int fxsave_user(struct i387_fxsave_struct __user *fx) 108static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
140{ 109{
141 int err; 110 int err;
@@ -149,6 +118,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
149 if (unlikely(err)) 118 if (unlikely(err))
150 return -EFAULT; 119 return -EFAULT;
151 120
121 /* See comment in fxsave() below. */
152 asm volatile("1: rex64/fxsave (%[fx])\n\t" 122 asm volatile("1: rex64/fxsave (%[fx])\n\t"
153 "2:\n" 123 "2:\n"
154 ".section .fixup,\"ax\"\n" 124 ".section .fixup,\"ax\"\n"
@@ -157,11 +127,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
157 ".previous\n" 127 ".previous\n"
158 _ASM_EXTABLE(1b, 3b) 128 _ASM_EXTABLE(1b, 3b)
159 : [err] "=r" (err), "=m" (*fx) 129 : [err] "=r" (err), "=m" (*fx)
160#if 0 /* See comment in fxsave() below. */ 130 : [fx] "R" (fx), "0" (0));
161 : [fx] "r" (fx), "0" (0));
162#else
163 : [fx] "cdaSDb" (fx), "0" (0));
164#endif
165 if (unlikely(err) && 131 if (unlikely(err) &&
166 __clear_user(fx, sizeof(struct i387_fxsave_struct))) 132 __clear_user(fx, sizeof(struct i387_fxsave_struct)))
167 err = -EFAULT; 133 err = -EFAULT;
@@ -175,56 +141,29 @@ static inline void fpu_fxsave(struct fpu *fpu)
175 uses any extended registers for addressing, a second REX prefix 141 uses any extended registers for addressing, a second REX prefix
176 will be generated (to the assembler, rex64 followed by semicolon 142 will be generated (to the assembler, rex64 followed by semicolon
177 is a separate instruction), and hence the 64-bitness is lost. */ 143 is a separate instruction), and hence the 64-bitness is lost. */
178#if 0 144
145#ifdef CONFIG_AS_FXSAVEQ
179 /* Using "fxsaveq %0" would be the ideal choice, but is only supported 146 /* Using "fxsaveq %0" would be the ideal choice, but is only supported
180 starting with gas 2.16. */ 147 starting with gas 2.16. */
181 __asm__ __volatile__("fxsaveq %0" 148 __asm__ __volatile__("fxsaveq %0"
182 : "=m" (fpu->state->fxsave)); 149 : "=m" (fpu->state->fxsave));
183#elif 0 150#else
184 /* Using, as a workaround, the properly prefixed form below isn't 151 /* Using, as a workaround, the properly prefixed form below isn't
185 accepted by any binutils version so far released, complaining that 152 accepted by any binutils version so far released, complaining that
186 the same type of prefix is used twice if an extended register is 153 the same type of prefix is used twice if an extended register is
187 needed for addressing (fix submitted to mainline 2005-11-21). */ 154 needed for addressing (fix submitted to mainline 2005-11-21).
188 __asm__ __volatile__("rex64/fxsave %0" 155 asm volatile("rex64/fxsave %0"
189 : "=m" (fpu->state->fxsave)); 156 : "=m" (fpu->state->fxsave));
190#else 157 This, however, we can work around by forcing the compiler to select
191 /* This, however, we can work around by forcing the compiler to select
192 an addressing mode that doesn't require extended registers. */ 158 an addressing mode that doesn't require extended registers. */
193 __asm__ __volatile__("rex64/fxsave (%1)" 159 asm volatile("rex64/fxsave (%[fx])"
194 : "=m" (fpu->state->fxsave) 160 : "=m" (fpu->state->fxsave)
195 : "cdaSDb" (&fpu->state->fxsave)); 161 : [fx] "R" (&fpu->state->fxsave));
196#endif 162#endif
197} 163}
198 164
199static inline void fpu_save_init(struct fpu *fpu)
200{
201 if (use_xsave())
202 fpu_xsave(fpu);
203 else
204 fpu_fxsave(fpu);
205
206 fpu_clear(fpu);
207}
208
209static inline void __save_init_fpu(struct task_struct *tsk)
210{
211 fpu_save_init(&tsk->thread.fpu);
212 task_thread_info(tsk)->status &= ~TS_USEDFPU;
213}
214
215#else /* CONFIG_X86_32 */ 165#else /* CONFIG_X86_32 */
216 166
217#ifdef CONFIG_MATH_EMULATION
218extern void finit_soft_fpu(struct i387_soft_struct *soft);
219#else
220static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
221#endif
222
223static inline void tolerant_fwait(void)
224{
225 asm volatile("fnclex ; fwait");
226}
227
228/* perform fxrstor iff the processor has extended states, otherwise frstor */ 167/* perform fxrstor iff the processor has extended states, otherwise frstor */
229static inline int fxrstor_checking(struct i387_fxsave_struct *fx) 168static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
230{ 169{
@@ -241,6 +180,14 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
241 return 0; 180 return 0;
242} 181}
243 182
183static inline void fpu_fxsave(struct fpu *fpu)
184{
185 asm volatile("fxsave %[fx]"
186 : [fx] "=m" (fpu->state->fxsave));
187}
188
189#endif /* CONFIG_X86_64 */
190
244/* We need a safe address that is cheap to find and that is already 191/* We need a safe address that is cheap to find and that is already
245 in L1 during context switch. The best choices are unfortunately 192 in L1 during context switch. The best choices are unfortunately
246 different for UP and SMP */ 193 different for UP and SMP */
@@ -256,47 +203,33 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
256static inline void fpu_save_init(struct fpu *fpu) 203static inline void fpu_save_init(struct fpu *fpu)
257{ 204{
258 if (use_xsave()) { 205 if (use_xsave()) {
259 struct xsave_struct *xstate = &fpu->state->xsave;
260 struct i387_fxsave_struct *fx = &fpu->state->fxsave;
261
262 fpu_xsave(fpu); 206 fpu_xsave(fpu);
263 207
264 /* 208 /*
265 * xsave header may indicate the init state of the FP. 209 * xsave header may indicate the init state of the FP.
266 */ 210 */
267 if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) 211 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
268 goto end; 212 return;
269 213 } else if (use_fxsr()) {
270 if (unlikely(fx->swd & X87_FSW_ES)) 214 fpu_fxsave(fpu);
271 asm volatile("fnclex"); 215 } else {
272 216 asm volatile("fsave %[fx]; fwait"
273 /* 217 : [fx] "=m" (fpu->state->fsave));
274 * we can do a simple return here or be paranoid :) 218 return;
275 */
276 goto clear_state;
277 } 219 }
278 220
279 /* Use more nops than strictly needed in case the compiler 221 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES))
280 varies code */ 222 asm volatile("fnclex");
281 alternative_input( 223
282 "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
283 "fxsave %[fx]\n"
284 "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
285 X86_FEATURE_FXSR,
286 [fx] "m" (fpu->state->fxsave),
287 [fsw] "m" (fpu->state->fxsave.swd) : "memory");
288clear_state:
289 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 224 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
290 is pending. Clear the x87 state here by setting it to fixed 225 is pending. Clear the x87 state here by setting it to fixed
291 values. safe_address is a random variable that should be in L1 */ 226 values. safe_address is a random variable that should be in L1 */
292 alternative_input( 227 alternative_input(
293 GENERIC_NOP8 GENERIC_NOP2, 228 ASM_NOP8 ASM_NOP2,
294 "emms\n\t" /* clear stack tags */ 229 "emms\n\t" /* clear stack tags */
295 "fildl %[addr]", /* set F?P to defined value */ 230 "fildl %P[addr]", /* set F?P to defined value */
296 X86_FEATURE_FXSAVE_LEAK, 231 X86_FEATURE_FXSAVE_LEAK,
297 [addr] "m" (safe_address)); 232 [addr] "m" (safe_address));
298end:
299 ;
300} 233}
301 234
302static inline void __save_init_fpu(struct task_struct *tsk) 235static inline void __save_init_fpu(struct task_struct *tsk)
@@ -305,9 +238,6 @@ static inline void __save_init_fpu(struct task_struct *tsk)
305 task_thread_info(tsk)->status &= ~TS_USEDFPU; 238 task_thread_info(tsk)->status &= ~TS_USEDFPU;
306} 239}
307 240
308
309#endif /* CONFIG_X86_64 */
310
311static inline int fpu_fxrstor_checking(struct fpu *fpu) 241static inline int fpu_fxrstor_checking(struct fpu *fpu)
312{ 242{
313 return fxrstor_checking(&fpu->state->fxsave); 243 return fxrstor_checking(&fpu->state->fxsave);
@@ -344,7 +274,10 @@ static inline void __unlazy_fpu(struct task_struct *tsk)
344static inline void __clear_fpu(struct task_struct *tsk) 274static inline void __clear_fpu(struct task_struct *tsk)
345{ 275{
346 if (task_thread_info(tsk)->status & TS_USEDFPU) { 276 if (task_thread_info(tsk)->status & TS_USEDFPU) {
347 tolerant_fwait(); 277 /* Ignore delayed exceptions from user space */
278 asm volatile("1: fwait\n"
279 "2:\n"
280 _ASM_EXTABLE(1b, 2b));
348 task_thread_info(tsk)->status &= ~TS_USEDFPU; 281 task_thread_info(tsk)->status &= ~TS_USEDFPU;
349 stts(); 282 stts();
350 } 283 }
@@ -405,19 +338,6 @@ static inline void irq_ts_restore(int TS_state)
405 stts(); 338 stts();
406} 339}
407 340
408#ifdef CONFIG_X86_64
409
410static inline void save_init_fpu(struct task_struct *tsk)
411{
412 __save_init_fpu(tsk);
413 stts();
414}
415
416#define unlazy_fpu __unlazy_fpu
417#define clear_fpu __clear_fpu
418
419#else /* CONFIG_X86_32 */
420
421/* 341/*
422 * These disable preemption on their own and are safe 342 * These disable preemption on their own and are safe
423 */ 343 */
@@ -443,8 +363,6 @@ static inline void clear_fpu(struct task_struct *tsk)
443 preempt_enable(); 363 preempt_enable();
444} 364}
445 365
446#endif /* CONFIG_X86_64 */
447
448/* 366/*
449 * i387 state interaction 367 * i387 state interaction
450 */ 368 */
@@ -508,7 +426,4 @@ extern void fpu_finit(struct fpu *fpu);
508 426
509#endif /* __ASSEMBLY__ */ 427#endif /* __ASSEMBLY__ */
510 428
511#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5
512#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5
513
514#endif /* _ASM_X86_I387_H */ 429#endif /* _ASM_X86_I387_H */
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 1655147646aa..a20365953bf8 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -55,6 +55,8 @@ extern struct irq_chip i8259A_chip;
55struct legacy_pic { 55struct legacy_pic {
56 int nr_legacy_irqs; 56 int nr_legacy_irqs;
57 struct irq_chip *chip; 57 struct irq_chip *chip;
58 void (*mask)(unsigned int irq);
59 void (*unmask)(unsigned int irq);
58 void (*mask_all)(void); 60 void (*mask_all)(void);
59 void (*restore_mask)(void); 61 void (*restore_mask)(void);
60 void (*init)(int auto_eoi); 62 void (*init)(int auto_eoi);
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 30a3e9776123..6a45ec41ec26 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -206,6 +206,7 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
206 206
207extern void iounmap(volatile void __iomem *addr); 207extern void iounmap(volatile void __iomem *addr);
208 208
209extern void set_iounmap_nonlazy(void);
209 210
210#ifdef __KERNEL__ 211#ifdef __KERNEL__
211 212
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 9cb2edb87c2f..c8be4566c3d2 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -170,12 +170,6 @@ extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
170 170
171extern void probe_nr_irqs_gsi(void); 171extern void probe_nr_irqs_gsi(void);
172 172
173extern int setup_ioapic_entry(int apic, int irq,
174 struct IO_APIC_route_entry *entry,
175 unsigned int destination, int trigger,
176 int polarity, int vector, int pin);
177extern void ioapic_write_entry(int apic, int pin,
178 struct IO_APIC_route_entry e);
179extern void setup_ioapic_ids_from_mpc(void); 173extern void setup_ioapic_ids_from_mpc(void);
180 174
181struct mp_ioapic_gsi{ 175struct mp_ioapic_gsi{
diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h
new file mode 100644
index 000000000000..f229b13a5f30
--- /dev/null
+++ b/arch/x86/include/asm/iommu_table.h
@@ -0,0 +1,100 @@
1#ifndef _ASM_X86_IOMMU_TABLE_H
2#define _ASM_X86_IOMMU_TABLE_H
3
4#include <asm/swiotlb.h>
5
6/*
7 * History lesson:
8 * The execution chain of IOMMUs in 2.6.36 looks as so:
9 *
10 * [xen-swiotlb]
11 * |
12 * +----[swiotlb *]--+
13 * / | \
14 * / | \
15 * [GART] [Calgary] [Intel VT-d]
16 * /
17 * /
18 * [AMD-Vi]
19 *
20 * *: if SWIOTLB detected 'iommu=soft'/'swiotlb=force' it would skip
21 * over the rest of IOMMUs and unconditionally initialize the SWIOTLB.
22 * Also it would surreptitiously initialize set the swiotlb=1 if there were
23 * more than 4GB and if the user did not pass in 'iommu=off'. The swiotlb
24 * flag would be turned off by all IOMMUs except the Calgary one.
25 *
26 * The IOMMU_INIT* macros allow a similar tree (or more complex if desired)
27 * to be built by defining who we depend on.
28 *
29 * And all that needs to be done is to use one of the macros in the IOMMU
30 * and the pci-dma.c will take care of the rest.
31 */
32
33struct iommu_table_entry {
34 initcall_t detect;
35 initcall_t depend;
36 void (*early_init)(void); /* No memory allocate available. */
37 void (*late_init)(void); /* Yes, can allocate memory. */
38#define IOMMU_FINISH_IF_DETECTED (1<<0)
39#define IOMMU_DETECTED (1<<1)
40 int flags;
41};
42/*
43 * Macro fills out an entry in the .iommu_table that is equivalent
44 * to the fields that 'struct iommu_table_entry' has. The entries
45 * that are put in the .iommu_table section are not put in any order
46 * hence during boot-time we will have to resort them based on
47 * dependency. */
48
49
50#define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\
51 static const struct iommu_table_entry const \
52 __iommu_entry_##_detect __used \
53 __attribute__ ((unused, __section__(".iommu_table"), \
54 aligned((sizeof(void *))))) \
55 = {_detect, _depend, _early_init, _late_init, \
56 _finish ? IOMMU_FINISH_IF_DETECTED : 0}
57/*
58 * The simplest IOMMU definition. Provide the detection routine
59 * and it will be run after the SWIOTLB and the other IOMMUs
60 * that utilize this macro. If the IOMMU is detected (ie, the
61 * detect routine returns a positive value), the other IOMMUs
62 * are also checked. You can use IOMMU_INIT_POST_FINISH if you prefer
63 * to stop detecting the other IOMMUs after yours has been detected.
64 */
65#define IOMMU_INIT_POST(_detect) \
66 __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 0)
67
68#define IOMMU_INIT_POST_FINISH(detect) \
69 __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 1)
70
71/*
72 * A more sophisticated version of IOMMU_INIT. This variant requires:
73 * a). A detection routine function.
74 * b). The name of the detection routine we depend on to get called
75 * before us.
76 * c). The init routine which gets called if the detection routine
77 * returns a positive value from the pci_iommu_alloc. This means
78 * no presence of a memory allocator.
79 * d). Similar to the 'init', except that this gets called from pci_iommu_init
80 * where we do have a memory allocator.
81 *
82 * The standard vs the _FINISH differs in that the _FINISH variant will
83 * continue detecting other IOMMUs in the call list after the
84 * the detection routine returns a positive number. The _FINISH will
85 * stop the execution chain. Both will still call the 'init' and
86 * 'late_init' functions if they are set.
87 */
88#define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \
89 __IOMMU_INIT(_detect, _depend, _init, _late_init, 1)
90
91#define IOMMU_INIT(_detect, _depend, _init, _late_init) \
92 __IOMMU_INIT(_detect, _depend, _init, _late_init, 0)
93
94void sort_iommu_table(struct iommu_table_entry *start,
95 struct iommu_table_entry *finish);
96
97void check_iommu_entries(struct iommu_table_entry *start,
98 struct iommu_table_entry *finish);
99
100#endif /* _ASM_X86_IOMMU_TABLE_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index f275e2244505..1c23360fb2d8 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -3,4 +3,39 @@
3 3
4#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) 4#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
5 5
6#ifdef CONFIG_INTR_REMAP
7static inline void prepare_irte(struct irte *irte, int vector,
8 unsigned int dest)
9{
10 memset(irte, 0, sizeof(*irte));
11
12 irte->present = 1;
13 irte->dst_mode = apic->irq_dest_mode;
14 /*
15 * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
16 * actual level or edge trigger will be setup in the IO-APIC
17 * RTE. This will help simplify level triggered irq migration.
18 * For more details, see the comments (in io_apic.c) explainig IO-APIC
19 * irq migration in the presence of interrupt-remapping.
20 */
21 irte->trigger_mode = 0;
22 irte->dlvry_mode = apic->irq_delivery_mode;
23 irte->vector = vector;
24 irte->dest_id = IRTE_DEST(dest);
25 irte->redir_hint = 1;
26}
27static inline bool irq_remapped(struct irq_cfg *cfg)
28{
29 return cfg->irq_2_iommu.iommu != NULL;
30}
31#else
32static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
33{
34}
35static inline bool irq_remapped(struct irq_cfg *cfg)
36{
37 return false;
38}
39#endif
40
6#endif /* _ASM_X86_IRQ_REMAPPING_H */ 41#endif /* _ASM_X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index e2ca30092557..6af0894dafb4 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -114,9 +114,9 @@
114#define X86_PLATFORM_IPI_VECTOR 0xed 114#define X86_PLATFORM_IPI_VECTOR 0xed
115 115
116/* 116/*
117 * Performance monitoring pending work vector: 117 * IRQ work vector:
118 */ 118 */
119#define LOCAL_PENDING_VECTOR 0xec 119#define IRQ_WORK_VECTOR 0xec
120 120
121#define UV_BAU_MESSAGE 0xea 121#define UV_BAU_MESSAGE 0xea
122 122
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
new file mode 100644
index 000000000000..f52d42e80585
--- /dev/null
+++ b/arch/x86/include/asm/jump_label.h
@@ -0,0 +1,37 @@
1#ifndef _ASM_X86_JUMP_LABEL_H
2#define _ASM_X86_JUMP_LABEL_H
3
4#ifdef __KERNEL__
5
6#include <linux/types.h>
7#include <asm/nops.h>
8
9#define JUMP_LABEL_NOP_SIZE 5
10
11# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
12
13# define JUMP_LABEL(key, label) \
14 do { \
15 asm goto("1:" \
16 JUMP_LABEL_INITIAL_NOP \
17 ".pushsection __jump_table, \"a\" \n\t"\
18 _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
19 ".popsection \n\t" \
20 : : "i" (key) : : label); \
21 } while (0)
22
23#endif /* __KERNEL__ */
24
25#ifdef CONFIG_X86_64
26typedef u64 jump_label_t;
27#else
28typedef u32 jump_label_t;
29#endif
30
31struct jump_entry {
32 jump_label_t code;
33 jump_label_t target;
34 jump_label_t key;
35};
36
37#endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 502e53f999cf..c52e2eb40a1e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -652,20 +652,6 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
652 return (struct kvm_mmu_page *)page_private(page); 652 return (struct kvm_mmu_page *)page_private(page);
653} 653}
654 654
655static inline u16 kvm_read_fs(void)
656{
657 u16 seg;
658 asm("mov %%fs, %0" : "=g"(seg));
659 return seg;
660}
661
662static inline u16 kvm_read_gs(void)
663{
664 u16 seg;
665 asm("mov %%gs, %0" : "=g"(seg));
666 return seg;
667}
668
669static inline u16 kvm_read_ldt(void) 655static inline u16 kvm_read_ldt(void)
670{ 656{
671 u16 ldt; 657 u16 ldt;
@@ -673,16 +659,6 @@ static inline u16 kvm_read_ldt(void)
673 return ldt; 659 return ldt;
674} 660}
675 661
676static inline void kvm_load_fs(u16 sel)
677{
678 asm("mov %0, %%fs" : : "rm"(sel));
679}
680
681static inline void kvm_load_gs(u16 sel)
682{
683 asm("mov %0, %%gs" : : "rm"(sel));
684}
685
686static inline void kvm_load_ldt(u16 sel) 662static inline void kvm_load_ldt(u16 sel)
687{ 663{
688 asm("lldt %0" : : "rm"(sel)); 664 asm("lldt %0" : : "rm"(sel));
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
index 16350740edf6..4a711a684b17 100644
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@@ -10,6 +10,9 @@
10 */ 10 */
11#ifndef _ASM_X86_MRST_H 11#ifndef _ASM_X86_MRST_H
12#define _ASM_X86_MRST_H 12#define _ASM_X86_MRST_H
13
14#include <linux/sfi.h>
15
13extern int pci_mrst_init(void); 16extern int pci_mrst_init(void);
14int __init sfi_parse_mrtc(struct sfi_table_header *table); 17int __init sfi_parse_mrtc(struct sfi_table_header *table);
15 18
@@ -26,7 +29,7 @@ enum mrst_cpu_type {
26}; 29};
27 30
28extern enum mrst_cpu_type __mrst_cpu_chip; 31extern enum mrst_cpu_type __mrst_cpu_chip;
29static enum mrst_cpu_type mrst_identify_cpu(void) 32static inline enum mrst_cpu_type mrst_identify_cpu(void)
30{ 33{
31 return __mrst_cpu_chip; 34 return __mrst_cpu_chip;
32} 35}
@@ -42,4 +45,9 @@ extern enum mrst_timer_options mrst_timer_options;
42#define SFI_MTMR_MAX_NUM 8 45#define SFI_MTMR_MAX_NUM 8
43#define SFI_MRTC_MAX 8 46#define SFI_MRTC_MAX 8
44 47
48extern struct console early_mrst_console;
49extern void mrst_early_console_init(void);
50
51extern struct console early_hsu_console;
52extern void hsu_early_console_init(void);
45#endif /* _ASM_X86_MRST_H */ 53#endif /* _ASM_X86_MRST_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
new file mode 100644
index 000000000000..bcdff997668c
--- /dev/null
+++ b/arch/x86/include/asm/mwait.h
@@ -0,0 +1,15 @@
1#ifndef _ASM_X86_MWAIT_H
2#define _ASM_X86_MWAIT_H
3
4#define MWAIT_SUBSTATE_MASK 0xf
5#define MWAIT_CSTATE_MASK 0xf
6#define MWAIT_SUBSTATE_SIZE 4
7#define MWAIT_MAX_NUM_CSTATES 8
8
9#define CPUID_MWAIT_LEAF 5
10#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
11#define CPUID5_ECX_INTERRUPT_BREAK 0x2
12
13#define MWAIT_ECX_INTERRUPT_BREAK 0x1
14
15#endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h
index 08fde475cb3b..2a8478140bb3 100644
--- a/arch/x86/include/asm/olpc_ofw.h
+++ b/arch/x86/include/asm/olpc_ofw.h
@@ -21,10 +21,14 @@ extern void olpc_ofw_detect(void);
21/* install OFW's pde permanently into the kernel's pgtable */ 21/* install OFW's pde permanently into the kernel's pgtable */
22extern void setup_olpc_ofw_pgd(void); 22extern void setup_olpc_ofw_pgd(void);
23 23
24/* check if OFW was detected during boot */
25extern bool olpc_ofw_present(void);
26
24#else /* !CONFIG_OLPC_OPENFIRMWARE */ 27#else /* !CONFIG_OLPC_OPENFIRMWARE */
25 28
26static inline void olpc_ofw_detect(void) { } 29static inline void olpc_ofw_detect(void) { }
27static inline void setup_olpc_ofw_pgd(void) { } 30static inline void setup_olpc_ofw_pgd(void) { }
31static inline bool olpc_ofw_present(void) { return false; }
28 32
29#endif /* !CONFIG_OLPC_OPENFIRMWARE */ 33#endif /* !CONFIG_OLPC_OPENFIRMWARE */
30 34
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index a667f24c7254..1df66211fd1b 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -8,7 +8,7 @@
8#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) 8#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
9#define PAGE_MASK (~(PAGE_SIZE-1)) 9#define PAGE_MASK (~(PAGE_SIZE-1))
10 10
11#define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1) 11#define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
12#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) 12#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
13 13
14/* Cast PAGE_MASK to a signed type so that it is sign-extended if 14/* Cast PAGE_MASK to a signed type so that it is sign-extended if
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 499954c530da..18e3b8a8709f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -416,11 +416,6 @@ static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
416 PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); 416 PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
417} 417}
418 418
419static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
420 unsigned long start, unsigned long count)
421{
422 PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count);
423}
424static inline void paravirt_release_pmd(unsigned long pfn) 419static inline void paravirt_release_pmd(unsigned long pfn)
425{ 420{
426 PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); 421 PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index db9ef5532341..b82bac975250 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -255,7 +255,6 @@ struct pv_mmu_ops {
255 */ 255 */
256 void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); 256 void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
257 void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); 257 void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
258 void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
259 void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); 258 void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
260 void (*release_pte)(unsigned long pfn); 259 void (*release_pte)(unsigned long pfn);
261 void (*release_pmd)(unsigned long pfn); 260 void (*release_pmd)(unsigned long pfn);
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index def500776b16..a70cd216be5d 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -36,19 +36,6 @@
36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) 36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) 37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
38 38
39/* Non HT mask */
40#define P4_ESCR_MASK \
41 (P4_ESCR_EVENT_MASK | \
42 P4_ESCR_EVENTMASK_MASK | \
43 P4_ESCR_TAG_MASK | \
44 P4_ESCR_TAG_ENABLE | \
45 P4_ESCR_T0_OS | \
46 P4_ESCR_T0_USR)
47
48/* HT mask */
49#define P4_ESCR_MASK_HT \
50 (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR)
51
52#define P4_CCCR_OVF 0x80000000U 39#define P4_CCCR_OVF 0x80000000U
53#define P4_CCCR_CASCADE 0x40000000U 40#define P4_CCCR_CASCADE 0x40000000U
54#define P4_CCCR_OVF_PMI_T0 0x04000000U 41#define P4_CCCR_OVF_PMI_T0 0x04000000U
@@ -70,23 +57,6 @@
70#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) 57#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
71#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) 58#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
72 59
73/* Non HT mask */
74#define P4_CCCR_MASK \
75 (P4_CCCR_OVF | \
76 P4_CCCR_CASCADE | \
77 P4_CCCR_OVF_PMI_T0 | \
78 P4_CCCR_FORCE_OVF | \
79 P4_CCCR_EDGE | \
80 P4_CCCR_THRESHOLD_MASK | \
81 P4_CCCR_COMPLEMENT | \
82 P4_CCCR_COMPARE | \
83 P4_CCCR_ESCR_SELECT_MASK | \
84 P4_CCCR_ENABLE)
85
86/* HT mask */
87#define P4_CCCR_MASK_HT \
88 (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY)
89
90#define P4_GEN_ESCR_EMASK(class, name, bit) \ 60#define P4_GEN_ESCR_EMASK(class, name, bit) \
91 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) 61 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
92#define P4_ESCR_EMASK_BIT(class, name) class##__##name 62#define P4_ESCR_EMASK_BIT(class, name) class##__##name
@@ -127,6 +97,28 @@
127#define P4_CONFIG_HT_SHIFT 63 97#define P4_CONFIG_HT_SHIFT 63
128#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) 98#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
129 99
100/*
101 * The bits we allow to pass for RAW events
102 */
103#define P4_CONFIG_MASK_ESCR \
104 P4_ESCR_EVENT_MASK | \
105 P4_ESCR_EVENTMASK_MASK | \
106 P4_ESCR_TAG_MASK | \
107 P4_ESCR_TAG_ENABLE
108
109#define P4_CONFIG_MASK_CCCR \
110 P4_CCCR_EDGE | \
111 P4_CCCR_THRESHOLD_MASK | \
112 P4_CCCR_COMPLEMENT | \
113 P4_CCCR_COMPARE | \
114 P4_CCCR_THREAD_ANY | \
115 P4_CCCR_RESERVED
116
117/* some dangerous bits are reserved for kernel internals */
118#define P4_CONFIG_MASK \
119 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
120 (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
121
130static inline bool p4_is_event_cascaded(u64 config) 122static inline bool p4_is_event_cascaded(u64 config)
131{ 123{
132 u32 cccr = p4_config_unpack_cccr(config); 124 u32 cccr = p4_config_unpack_cccr(config);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a34c785c5a63..ada823a13c7c 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,6 +28,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
28extern spinlock_t pgd_lock; 28extern spinlock_t pgd_lock;
29extern struct list_head pgd_list; 29extern struct list_head pgd_list;
30 30
31extern struct mm_struct *pgd_page_get_mm(struct page *page);
32
31#ifdef CONFIG_PARAVIRT 33#ifdef CONFIG_PARAVIRT
32#include <asm/paravirt.h> 34#include <asm/paravirt.h>
33#else /* !CONFIG_PARAVIRT */ 35#else /* !CONFIG_PARAVIRT */
@@ -603,6 +605,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
603 pte_update(mm, addr, ptep); 605 pte_update(mm, addr, ptep);
604} 606}
605 607
608#define flush_tlb_fix_spurious_fault(vma, address)
609
606/* 610/*
607 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); 611 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
608 * 612 *
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 076052cd62be..f96ac9bedf75 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -102,6 +102,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
102 native_set_pgd(pgd, native_make_pgd(0)); 102 native_set_pgd(pgd, native_make_pgd(0));
103} 103}
104 104
105extern void sync_global_pgds(unsigned long start, unsigned long end);
106
105/* 107/*
106 * Conversion functions: convert a page and protection to a page entry, 108 * Conversion functions: convert a page and protection to a page entry,
107 * and a page entry and page directory to the page they refer to. 109 * and a page entry and page directory to the page they refer to.
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 325b7bdbebaa..cae9c3cb95cf 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -110,6 +110,8 @@ struct cpuinfo_x86 {
110 u16 phys_proc_id; 110 u16 phys_proc_id;
111 /* Core id: */ 111 /* Core id: */
112 u16 cpu_core_id; 112 u16 cpu_core_id;
113 /* Compute unit id */
114 u8 compute_unit_id;
113 /* Index into per_cpu list: */ 115 /* Index into per_cpu list: */
114 u16 cpu_index; 116 u16 cpu_index;
115#endif 117#endif
@@ -602,7 +604,7 @@ extern unsigned long mmu_cr4_features;
602 604
603static inline void set_in_cr4(unsigned long mask) 605static inline void set_in_cr4(unsigned long mask)
604{ 606{
605 unsigned cr4; 607 unsigned long cr4;
606 608
607 mmu_cr4_features |= mask; 609 mmu_cr4_features |= mask;
608 cr4 = read_cr4(); 610 cr4 = read_cr4();
@@ -612,7 +614,7 @@ static inline void set_in_cr4(unsigned long mask)
612 614
613static inline void clear_in_cr4(unsigned long mask) 615static inline void clear_in_cr4(unsigned long mask)
614{ 616{
615 unsigned cr4; 617 unsigned long cr4;
616 618
617 mmu_cr4_features &= ~mask; 619 mmu_cr4_features &= ~mask;
618 cr4 = read_cr4(); 620 cr4 = read_cr4();
@@ -764,29 +766,6 @@ extern unsigned long idle_halt;
764extern unsigned long idle_nomwait; 766extern unsigned long idle_nomwait;
765extern bool c1e_detected; 767extern bool c1e_detected;
766 768
767/*
768 * on systems with caches, caches must be flashed as the absolute
769 * last instruction before going into a suspended halt. Otherwise,
770 * dirty data can linger in the cache and become stale on resume,
771 * leading to strange errors.
772 *
773 * perform a variety of operations to guarantee that the compiler
774 * will not reorder instructions. wbinvd itself is serializing
775 * so the processor will not reorder.
776 *
777 * Systems without cache can just go into halt.
778 */
779static inline void wbinvd_halt(void)
780{
781 mb();
782 /* check for clflush to determine if wbinvd is legal */
783 if (cpu_has_clflush)
784 asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory");
785 else
786 while (1)
787 halt();
788}
789
790extern void enable_sep_cpu(void); 769extern void enable_sep_cpu(void);
791extern int sysenter_setup(void); 770extern int sysenter_setup(void);
792 771
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ef292c792d74..d6763b139a84 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -93,6 +93,11 @@ void *extend_brk(size_t size, size_t align);
93 : : "i" (sz)); \ 93 : : "i" (sz)); \
94 } 94 }
95 95
96/* Helper for reserving space for arrays of things */
97#define RESERVE_BRK_ARRAY(type, name, entries) \
98 type *name; \
99 RESERVE_BRK(name, sizeof(type) * entries)
100
96#ifdef __i386__ 101#ifdef __i386__
97 102
98void __init i386_start_kernel(void); 103void __init i386_start_kernel(void);
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 8085277e1b8b..977f1761a25d 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -5,17 +5,26 @@
5 5
6#ifdef CONFIG_SWIOTLB 6#ifdef CONFIG_SWIOTLB
7extern int swiotlb; 7extern int swiotlb;
8extern int __init pci_swiotlb_detect(void); 8extern int __init pci_swiotlb_detect_override(void);
9extern int __init pci_swiotlb_detect_4gb(void);
9extern void __init pci_swiotlb_init(void); 10extern void __init pci_swiotlb_init(void);
11extern void __init pci_swiotlb_late_init(void);
10#else 12#else
11#define swiotlb 0 13#define swiotlb 0
12static inline int pci_swiotlb_detect(void) 14static inline int pci_swiotlb_detect_override(void)
15{
16 return 0;
17}
18static inline int pci_swiotlb_detect_4gb(void)
13{ 19{
14 return 0; 20 return 0;
15} 21}
16static inline void pci_swiotlb_init(void) 22static inline void pci_swiotlb_init(void)
17{ 23{
18} 24}
25static inline void pci_swiotlb_late_init(void)
26{
27}
19#endif 28#endif
20 29
21static inline void dma_mark_clean(void *addr, size_t size) {} 30static inline void dma_mark_clean(void *addr, size_t size) {}
diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h
deleted file mode 100644
index 61e08c0a2907..000000000000
--- a/arch/x86/include/asm/vmi.h
+++ /dev/null
@@ -1,269 +0,0 @@
1/*
2 * VMI interface definition
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Maintained by: Zachary Amsden zach@vmware.com
22 *
23 */
24#include <linux/types.h>
25
26/*
27 *---------------------------------------------------------------------
28 *
29 * VMI Option ROM API
30 *
31 *---------------------------------------------------------------------
32 */
33#define VMI_SIGNATURE 0x696d5663 /* "cVmi" */
34
35#define PCI_VENDOR_ID_VMWARE 0x15AD
36#define PCI_DEVICE_ID_VMWARE_VMI 0x0801
37
38/*
39 * We use two version numbers for compatibility, with the major
40 * number signifying interface breakages, and the minor number
41 * interface extensions.
42 */
43#define VMI_API_REV_MAJOR 3
44#define VMI_API_REV_MINOR 0
45
46#define VMI_CALL_CPUID 0
47#define VMI_CALL_WRMSR 1
48#define VMI_CALL_RDMSR 2
49#define VMI_CALL_SetGDT 3
50#define VMI_CALL_SetLDT 4
51#define VMI_CALL_SetIDT 5
52#define VMI_CALL_SetTR 6
53#define VMI_CALL_GetGDT 7
54#define VMI_CALL_GetLDT 8
55#define VMI_CALL_GetIDT 9
56#define VMI_CALL_GetTR 10
57#define VMI_CALL_WriteGDTEntry 11
58#define VMI_CALL_WriteLDTEntry 12
59#define VMI_CALL_WriteIDTEntry 13
60#define VMI_CALL_UpdateKernelStack 14
61#define VMI_CALL_SetCR0 15
62#define VMI_CALL_SetCR2 16
63#define VMI_CALL_SetCR3 17
64#define VMI_CALL_SetCR4 18
65#define VMI_CALL_GetCR0 19
66#define VMI_CALL_GetCR2 20
67#define VMI_CALL_GetCR3 21
68#define VMI_CALL_GetCR4 22
69#define VMI_CALL_WBINVD 23
70#define VMI_CALL_SetDR 24
71#define VMI_CALL_GetDR 25
72#define VMI_CALL_RDPMC 26
73#define VMI_CALL_RDTSC 27
74#define VMI_CALL_CLTS 28
75#define VMI_CALL_EnableInterrupts 29
76#define VMI_CALL_DisableInterrupts 30
77#define VMI_CALL_GetInterruptMask 31
78#define VMI_CALL_SetInterruptMask 32
79#define VMI_CALL_IRET 33
80#define VMI_CALL_SYSEXIT 34
81#define VMI_CALL_Halt 35
82#define VMI_CALL_Reboot 36
83#define VMI_CALL_Shutdown 37
84#define VMI_CALL_SetPxE 38
85#define VMI_CALL_SetPxELong 39
86#define VMI_CALL_UpdatePxE 40
87#define VMI_CALL_UpdatePxELong 41
88#define VMI_CALL_MachineToPhysical 42
89#define VMI_CALL_PhysicalToMachine 43
90#define VMI_CALL_AllocatePage 44
91#define VMI_CALL_ReleasePage 45
92#define VMI_CALL_InvalPage 46
93#define VMI_CALL_FlushTLB 47
94#define VMI_CALL_SetLinearMapping 48
95
96#define VMI_CALL_SetIOPLMask 61
97#define VMI_CALL_SetInitialAPState 62
98#define VMI_CALL_APICWrite 63
99#define VMI_CALL_APICRead 64
100#define VMI_CALL_IODelay 65
101#define VMI_CALL_SetLazyMode 73
102
103/*
104 *---------------------------------------------------------------------
105 *
106 * MMU operation flags
107 *
108 *---------------------------------------------------------------------
109 */
110
111/* Flags used by VMI_{Allocate|Release}Page call */
112#define VMI_PAGE_PAE 0x10 /* Allocate PAE shadow */
113#define VMI_PAGE_CLONE 0x20 /* Clone from another shadow */
114#define VMI_PAGE_ZEROED 0x40 /* Page is pre-zeroed */
115
116
117/* Flags shared by Allocate|Release Page and PTE updates */
118#define VMI_PAGE_PT 0x01
119#define VMI_PAGE_PD 0x02
120#define VMI_PAGE_PDP 0x04
121#define VMI_PAGE_PML4 0x08
122
123#define VMI_PAGE_NORMAL 0x00 /* for debugging */
124
125/* Flags used by PTE updates */
126#define VMI_PAGE_CURRENT_AS 0x10 /* implies VMI_PAGE_VA_MASK is valid */
127#define VMI_PAGE_DEFER 0x20 /* may queue update until TLB inval */
128#define VMI_PAGE_VA_MASK 0xfffff000
129
130#ifdef CONFIG_X86_PAE
131#define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_PAE | VMI_PAGE_ZEROED)
132#define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_PAE | VMI_PAGE_ZEROED)
133#else
134#define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_ZEROED)
135#define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_ZEROED)
136#endif
137
138/* Flags used by VMI_FlushTLB call */
139#define VMI_FLUSH_TLB 0x01
140#define VMI_FLUSH_GLOBAL 0x02
141
142/*
143 *---------------------------------------------------------------------
144 *
145 * VMI relocation definitions for ROM call get_reloc
146 *
147 *---------------------------------------------------------------------
148 */
149
150/* VMI Relocation types */
151#define VMI_RELOCATION_NONE 0
152#define VMI_RELOCATION_CALL_REL 1
153#define VMI_RELOCATION_JUMP_REL 2
154#define VMI_RELOCATION_NOP 3
155
156#ifndef __ASSEMBLY__
157struct vmi_relocation_info {
158 unsigned char *eip;
159 unsigned char type;
160 unsigned char reserved[3];
161};
162#endif
163
164
165/*
166 *---------------------------------------------------------------------
167 *
168 * Generic ROM structures and definitions
169 *
170 *---------------------------------------------------------------------
171 */
172
173#ifndef __ASSEMBLY__
174
175struct vrom_header {
176 u16 rom_signature; /* option ROM signature */
177 u8 rom_length; /* ROM length in 512 byte chunks */
178 u8 rom_entry[4]; /* 16-bit code entry point */
179 u8 rom_pad0; /* 4-byte align pad */
180 u32 vrom_signature; /* VROM identification signature */
181 u8 api_version_min;/* Minor version of API */
182 u8 api_version_maj;/* Major version of API */
183 u8 jump_slots; /* Number of jump slots */
184 u8 reserved1; /* Reserved for expansion */
185 u32 virtual_top; /* Hypervisor virtual address start */
186 u16 reserved2; /* Reserved for expansion */
187 u16 license_offs; /* Offset to License string */
188 u16 pci_header_offs;/* Offset to PCI OPROM header */
189 u16 pnp_header_offs;/* Offset to PnP OPROM header */
190 u32 rom_pad3; /* PnP reserverd / VMI reserved */
191 u8 reserved[96]; /* Reserved for headers */
192 char vmi_init[8]; /* VMI_Init jump point */
193 char get_reloc[8]; /* VMI_GetRelocationInfo jump point */
194} __attribute__((packed));
195
196struct pnp_header {
197 char sig[4];
198 char rev;
199 char size;
200 short next;
201 short res;
202 long devID;
203 unsigned short manufacturer_offset;
204 unsigned short product_offset;
205} __attribute__((packed));
206
207struct pci_header {
208 char sig[4];
209 short vendorID;
210 short deviceID;
211 short vpdData;
212 short size;
213 char rev;
214 char class;
215 char subclass;
216 char interface;
217 short chunks;
218 char rom_version_min;
219 char rom_version_maj;
220 char codetype;
221 char lastRom;
222 short reserved;
223} __attribute__((packed));
224
225/* Function prototypes for bootstrapping */
226#ifdef CONFIG_VMI
227extern void vmi_init(void);
228extern void vmi_activate(void);
229extern void vmi_bringup(void);
230#else
231static inline void vmi_init(void) {}
232static inline void vmi_activate(void) {}
233static inline void vmi_bringup(void) {}
234#endif
235
236/* State needed to start an application processor in an SMP system. */
237struct vmi_ap_state {
238 u32 cr0;
239 u32 cr2;
240 u32 cr3;
241 u32 cr4;
242
243 u64 efer;
244
245 u32 eip;
246 u32 eflags;
247 u32 eax;
248 u32 ebx;
249 u32 ecx;
250 u32 edx;
251 u32 esp;
252 u32 ebp;
253 u32 esi;
254 u32 edi;
255 u16 cs;
256 u16 ss;
257 u16 ds;
258 u16 es;
259 u16 fs;
260 u16 gs;
261 u16 ldtr;
262
263 u16 gdtr_limit;
264 u32 gdtr_base;
265 u32 idtr_base;
266 u16 idtr_limit;
267};
268
269#endif
diff --git a/arch/x86/include/asm/vmi_time.h b/arch/x86/include/asm/vmi_time.h
deleted file mode 100644
index c6e0bee93e3c..000000000000
--- a/arch/x86/include/asm/vmi_time.h
+++ /dev/null
@@ -1,98 +0,0 @@
1/*
2 * VMI Time wrappers
3 *
4 * Copyright (C) 2006, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to dhecht@vmware.com
22 *
23 */
24
25#ifndef _ASM_X86_VMI_TIME_H
26#define _ASM_X86_VMI_TIME_H
27
28/*
29 * Raw VMI call indices for timer functions
30 */
31#define VMI_CALL_GetCycleFrequency 66
32#define VMI_CALL_GetCycleCounter 67
33#define VMI_CALL_SetAlarm 68
34#define VMI_CALL_CancelAlarm 69
35#define VMI_CALL_GetWallclockTime 70
36#define VMI_CALL_WallclockUpdated 71
37
38/* Cached VMI timer operations */
39extern struct vmi_timer_ops {
40 u64 (*get_cycle_frequency)(void);
41 u64 (*get_cycle_counter)(int);
42 u64 (*get_wallclock)(void);
43 int (*wallclock_updated)(void);
44 void (*set_alarm)(u32 flags, u64 expiry, u64 period);
45 void (*cancel_alarm)(u32 flags);
46} vmi_timer_ops;
47
48/* Prototypes */
49extern void __init vmi_time_init(void);
50extern unsigned long vmi_get_wallclock(void);
51extern int vmi_set_wallclock(unsigned long now);
52extern unsigned long long vmi_sched_clock(void);
53extern unsigned long vmi_tsc_khz(void);
54
55#ifdef CONFIG_X86_LOCAL_APIC
56extern void __devinit vmi_time_bsp_init(void);
57extern void __devinit vmi_time_ap_init(void);
58#endif
59
60/*
61 * When run under a hypervisor, a vcpu is always in one of three states:
62 * running, halted, or ready. The vcpu is in the 'running' state if it
63 * is executing. When the vcpu executes the halt interface, the vcpu
64 * enters the 'halted' state and remains halted until there is some work
65 * pending for the vcpu (e.g. an alarm expires, host I/O completes on
66 * behalf of virtual I/O). At this point, the vcpu enters the 'ready'
67 * state (waiting for the hypervisor to reschedule it). Finally, at any
68 * time when the vcpu is not in the 'running' state nor the 'halted'
69 * state, it is in the 'ready' state.
70 *
71 * Real time is advances while the vcpu is 'running', 'ready', or
72 * 'halted'. Stolen time is the time in which the vcpu is in the
73 * 'ready' state. Available time is the remaining time -- the vcpu is
74 * either 'running' or 'halted'.
75 *
76 * All three views of time are accessible through the VMI cycle
77 * counters.
78 */
79
80/* The cycle counters. */
81#define VMI_CYCLES_REAL 0
82#define VMI_CYCLES_AVAILABLE 1
83#define VMI_CYCLES_STOLEN 2
84
85/* The alarm interface 'flags' bits */
86#define VMI_ALARM_COUNTERS 2
87
88#define VMI_ALARM_COUNTER_MASK 0x000000ff
89
90#define VMI_ALARM_WIRED_IRQ0 0x00000000
91#define VMI_ALARM_WIRED_LVTT 0x00010000
92
93#define VMI_ALARM_IS_ONESHOT 0x00000000
94#define VMI_ALARM_IS_PERIODIC 0x00000100
95
96#define CONFIG_VMI_ALARM_HZ 100
97
98#endif /* _ASM_X86_VMI_TIME_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index fedf32a8c3ec..2c833d8c4141 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -34,7 +34,8 @@ GCOV_PROFILE_paravirt.o := n
34obj-y := process_$(BITS).o signal.o entry_$(BITS).o 34obj-y := process_$(BITS).o signal.o entry_$(BITS).o
35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
36obj-y += time.o ioport.o ldt.o dumpstack.o 36obj-y += time.o ioport.o ldt.o dumpstack.o
37obj-y += setup.o x86_init.o i8259.o irqinit.o 37obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
38obj-$(CONFIG_IRQ_WORK) += irq_work.o
38obj-$(CONFIG_X86_VISWS) += visws_quirks.o 39obj-$(CONFIG_X86_VISWS) += visws_quirks.o
39obj-$(CONFIG_X86_32) += probe_roms_32.o 40obj-$(CONFIG_X86_32) += probe_roms_32.o
40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 41obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
@@ -44,6 +45,7 @@ obj-y += bootflag.o e820.o
44obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o 45obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o 46obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
46obj-y += tsc.o io_delay.o rtc.o 47obj-y += tsc.o io_delay.o rtc.o
48obj-y += pci-iommu_table.o
47 49
48obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 50obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
49obj-y += process.o 51obj-y += process.o
@@ -85,15 +87,15 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
85obj-$(CONFIG_KGDB) += kgdb.o 87obj-$(CONFIG_KGDB) += kgdb.o
86obj-$(CONFIG_VM86) += vm86_32.o 88obj-$(CONFIG_VM86) += vm86_32.o
87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 89obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
90obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
88 91
89obj-$(CONFIG_HPET_TIMER) += hpet.o 92obj-$(CONFIG_HPET_TIMER) += hpet.o
90obj-$(CONFIG_APB_TIMER) += apb_timer.o 93obj-$(CONFIG_APB_TIMER) += apb_timer.o
91 94
92obj-$(CONFIG_K8_NB) += k8.o 95obj-$(CONFIG_AMD_NB) += amd_nb.o
93obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o 96obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
94obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o 97obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
95 98
96obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
97obj-$(CONFIG_KVM_GUEST) += kvm.o 99obj-$(CONFIG_KVM_GUEST) += kvm.o
98obj-$(CONFIG_KVM_CLOCK) += kvmclock.o 100obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
99obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o 101obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
@@ -106,6 +108,7 @@ obj-$(CONFIG_SCx200) += scx200.o
106scx200-y += scx200_32.o 108scx200-y += scx200_32.o
107 109
108obj-$(CONFIG_OLPC) += olpc.o 110obj-$(CONFIG_OLPC) += olpc.o
111obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o
109obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o 112obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o
110obj-$(CONFIG_X86_MRST) += mrst.o 113obj-$(CONFIG_X86_MRST) += mrst.o
111 114
@@ -122,7 +125,6 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
122# 64 bit specific files 125# 64 bit specific files
123ifeq ($(CONFIG_X86_64),y) 126ifeq ($(CONFIG_X86_64),y)
124 obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o 127 obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
125 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
126 obj-$(CONFIG_AUDIT) += audit_64.o 128 obj-$(CONFIG_AUDIT) += audit_64.o
127 129
128 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 130 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index fb16f17e59be..5812404a0d4c 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -13,6 +13,7 @@
13 13
14#include <acpi/processor.h> 14#include <acpi/processor.h>
15#include <asm/acpi.h> 15#include <asm/acpi.h>
16#include <asm/mwait.h>
16 17
17/* 18/*
18 * Initialize bm_flags based on the CPU cache properties 19 * Initialize bm_flags based on the CPU cache properties
@@ -65,16 +66,6 @@ static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */
65 66
66static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; 67static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
67 68
68#define MWAIT_SUBSTATE_MASK (0xf)
69#define MWAIT_CSTATE_MASK (0xf)
70#define MWAIT_SUBSTATE_SIZE (4)
71
72#define CPUID_MWAIT_LEAF (5)
73#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
74#define CPUID5_ECX_INTERRUPT_BREAK (0x2)
75
76#define MWAIT_ECX_INTERRUPT_BREAK (0x1)
77
78#define NATIVE_CSTATE_BEYOND_HALT (2) 69#define NATIVE_CSTATE_BEYOND_HALT (2)
79 70
80static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) 71static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f65ab8b014c4..a36bb90aef53 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
195 195
196extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 196extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
197extern s32 __smp_locks[], __smp_locks_end[]; 197extern s32 __smp_locks[], __smp_locks_end[];
198static void *text_poke_early(void *addr, const void *opcode, size_t len); 198void *text_poke_early(void *addr, const void *opcode, size_t len);
199 199
200/* Replace instructions with better alternatives for this CPU type. 200/* Replace instructions with better alternatives for this CPU type.
201 This runs before SMP is initialized to avoid SMP problems with 201 This runs before SMP is initialized to avoid SMP problems with
@@ -522,7 +522,7 @@ void __init alternative_instructions(void)
522 * instructions. And on the local CPU you need to be protected again NMI or MCE 522 * instructions. And on the local CPU you need to be protected again NMI or MCE
523 * handlers seeing an inconsistent instruction while you patch. 523 * handlers seeing an inconsistent instruction while you patch.
524 */ 524 */
525static void *__init_or_module text_poke_early(void *addr, const void *opcode, 525void *__init_or_module text_poke_early(void *addr, const void *opcode,
526 size_t len) 526 size_t len)
527{ 527{
528 unsigned long flags; 528 unsigned long flags;
@@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
637 tpp.len = len; 637 tpp.len = len;
638 atomic_set(&stop_machine_first, 1); 638 atomic_set(&stop_machine_first, 1);
639 wrote_text = 0; 639 wrote_text = 0;
640 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); 640 /* Use __stop_machine() because the caller already got online_cpus. */
641 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
641 return addr; 642 return addr;
642} 643}
643 644
645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
646
647unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
648
649void __init arch_init_ideal_nop5(void)
650{
651 extern const unsigned char ftrace_test_p6nop[];
652 extern const unsigned char ftrace_test_nop5[];
653 extern const unsigned char ftrace_test_jmp[];
654 int faulted = 0;
655
656 /*
657 * There is no good nop for all x86 archs.
658 * We will default to using the P6_NOP5, but first we
659 * will test to make sure that the nop will actually
660 * work on this CPU. If it faults, we will then
661 * go to a lesser efficient 5 byte nop. If that fails
662 * we then just use a jmp as our nop. This isn't the most
663 * efficient nop, but we can not use a multi part nop
664 * since we would then risk being preempted in the middle
665 * of that nop, and if we enabled tracing then, it might
666 * cause a system crash.
667 *
668 * TODO: check the cpuid to determine the best nop.
669 */
670 asm volatile (
671 "ftrace_test_jmp:"
672 "jmp ftrace_test_p6nop\n"
673 "nop\n"
674 "nop\n"
675 "nop\n" /* 2 byte jmp + 3 bytes */
676 "ftrace_test_p6nop:"
677 P6_NOP5
678 "jmp 1f\n"
679 "ftrace_test_nop5:"
680 ".byte 0x66,0x66,0x66,0x66,0x90\n"
681 "1:"
682 ".section .fixup, \"ax\"\n"
683 "2: movl $1, %0\n"
684 " jmp ftrace_test_nop5\n"
685 "3: movl $2, %0\n"
686 " jmp 1b\n"
687 ".previous\n"
688 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
689 _ASM_EXTABLE(ftrace_test_nop5, 3b)
690 : "=r"(faulted) : "0" (faulted));
691
692 switch (faulted) {
693 case 0:
694 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
695 memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
696 break;
697 case 1:
698 pr_info("converting mcount calls to 66 66 66 66 90\n");
699 memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
700 break;
701 case 2:
702 pr_info("converting mcount calls to jmp . + 5\n");
703 memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
704 break;
705 }
706
707}
708#endif
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 679b6450382b..d2fdb0826df2 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 5a170cbbbed8..6e11c8134158 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -31,7 +31,7 @@
31#include <asm/iommu.h> 31#include <asm/iommu.h>
32#include <asm/gart.h> 32#include <asm/gart.h>
33#include <asm/x86_init.h> 33#include <asm/x86_init.h>
34 34#include <asm/iommu_table.h>
35/* 35/*
36 * definitions for the ACPI scanning code 36 * definitions for the ACPI scanning code
37 */ 37 */
@@ -194,6 +194,39 @@ static inline unsigned long tbl_size(int entry_size)
194 return 1UL << shift; 194 return 1UL << shift;
195} 195}
196 196
197/* Access to l1 and l2 indexed register spaces */
198
199static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
200{
201 u32 val;
202
203 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
204 pci_read_config_dword(iommu->dev, 0xfc, &val);
205 return val;
206}
207
208static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
209{
210 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
211 pci_write_config_dword(iommu->dev, 0xfc, val);
212 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
213}
214
215static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
216{
217 u32 val;
218
219 pci_write_config_dword(iommu->dev, 0xf0, address);
220 pci_read_config_dword(iommu->dev, 0xf4, &val);
221 return val;
222}
223
224static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
225{
226 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
227 pci_write_config_dword(iommu->dev, 0xf4, val);
228}
229
197/**************************************************************************** 230/****************************************************************************
198 * 231 *
199 * AMD IOMMU MMIO register space handling functions 232 * AMD IOMMU MMIO register space handling functions
@@ -619,6 +652,7 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
619{ 652{
620 int cap_ptr = iommu->cap_ptr; 653 int cap_ptr = iommu->cap_ptr;
621 u32 range, misc; 654 u32 range, misc;
655 int i, j;
622 656
623 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, 657 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
624 &iommu->cap); 658 &iommu->cap);
@@ -633,12 +667,29 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
633 MMIO_GET_LD(range)); 667 MMIO_GET_LD(range));
634 iommu->evt_msi_num = MMIO_MSI_NUM(misc); 668 iommu->evt_msi_num = MMIO_MSI_NUM(misc);
635 669
636 if (is_rd890_iommu(iommu->dev)) { 670 if (!is_rd890_iommu(iommu->dev))
637 pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]); 671 return;
638 pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]); 672
639 pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]); 673 /*
640 pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]); 674 * Some rd890 systems may not be fully reconfigured by the BIOS, so
641 } 675 * it's necessary for us to store this information so it can be
676 * reprogrammed on resume
677 */
678
679 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
680 &iommu->stored_addr_lo);
681 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
682 &iommu->stored_addr_hi);
683
684 /* Low bit locks writes to configuration space */
685 iommu->stored_addr_lo &= ~1;
686
687 for (i = 0; i < 6; i++)
688 for (j = 0; j < 0x12; j++)
689 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
690
691 for (i = 0; i < 0x83; i++)
692 iommu->stored_l2[i] = iommu_read_l2(iommu, i);
642} 693}
643 694
644/* 695/*
@@ -1127,14 +1178,53 @@ static void iommu_init_flags(struct amd_iommu *iommu)
1127 iommu_feature_enable(iommu, CONTROL_COHERENT_EN); 1178 iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
1128} 1179}
1129 1180
1130static void iommu_apply_quirks(struct amd_iommu *iommu) 1181static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
1131{ 1182{
1132 if (is_rd890_iommu(iommu->dev)) { 1183 int i, j;
1133 pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]); 1184 u32 ioc_feature_control;
1134 pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]); 1185 struct pci_dev *pdev = NULL;
1135 pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]); 1186
1136 pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]); 1187 /* RD890 BIOSes may not have completely reconfigured the iommu */
1137 } 1188 if (!is_rd890_iommu(iommu->dev))
1189 return;
1190
1191 /*
1192 * First, we need to ensure that the iommu is enabled. This is
1193 * controlled by a register in the northbridge
1194 */
1195 pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0));
1196
1197 if (!pdev)
1198 return;
1199
1200 /* Select Northbridge indirect register 0x75 and enable writing */
1201 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
1202 pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
1203
1204 /* Enable the iommu */
1205 if (!(ioc_feature_control & 0x1))
1206 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
1207
1208 pci_dev_put(pdev);
1209
1210 /* Restore the iommu BAR */
1211 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1212 iommu->stored_addr_lo);
1213 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
1214 iommu->stored_addr_hi);
1215
1216 /* Restore the l1 indirect regs for each of the 6 l1s */
1217 for (i = 0; i < 6; i++)
1218 for (j = 0; j < 0x12; j++)
1219 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
1220
1221 /* Restore the l2 indirect regs */
1222 for (i = 0; i < 0x83; i++)
1223 iommu_write_l2(iommu, i, iommu->stored_l2[i]);
1224
1225 /* Lock PCI setup registers */
1226 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1227 iommu->stored_addr_lo | 1);
1138} 1228}
1139 1229
1140/* 1230/*
@@ -1147,7 +1237,6 @@ static void enable_iommus(void)
1147 1237
1148 for_each_iommu(iommu) { 1238 for_each_iommu(iommu) {
1149 iommu_disable(iommu); 1239 iommu_disable(iommu);
1150 iommu_apply_quirks(iommu);
1151 iommu_init_flags(iommu); 1240 iommu_init_flags(iommu);
1152 iommu_set_device_table(iommu); 1241 iommu_set_device_table(iommu);
1153 iommu_enable_command_buffer(iommu); 1242 iommu_enable_command_buffer(iommu);
@@ -1173,6 +1262,11 @@ static void disable_iommus(void)
1173 1262
1174static int amd_iommu_resume(struct sys_device *dev) 1263static int amd_iommu_resume(struct sys_device *dev)
1175{ 1264{
1265 struct amd_iommu *iommu;
1266
1267 for_each_iommu(iommu)
1268 iommu_apply_resume_quirks(iommu);
1269
1176 /* re-load the hardware */ 1270 /* re-load the hardware */
1177 enable_iommus(); 1271 enable_iommus();
1178 1272
@@ -1405,13 +1499,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
1405 return 0; 1499 return 0;
1406} 1500}
1407 1501
1408void __init amd_iommu_detect(void) 1502int __init amd_iommu_detect(void)
1409{ 1503{
1410 if (no_iommu || (iommu_detected && !gart_iommu_aperture)) 1504 if (no_iommu || (iommu_detected && !gart_iommu_aperture))
1411 return; 1505 return -ENODEV;
1412 1506
1413 if (amd_iommu_disabled) 1507 if (amd_iommu_disabled)
1414 return; 1508 return -ENODEV;
1415 1509
1416 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { 1510 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
1417 iommu_detected = 1; 1511 iommu_detected = 1;
@@ -1420,7 +1514,9 @@ void __init amd_iommu_detect(void)
1420 1514
1421 /* Make sure ACS will be enabled */ 1515 /* Make sure ACS will be enabled */
1422 pci_request_acs(); 1516 pci_request_acs();
1517 return 1;
1423 } 1518 }
1519 return -ENODEV;
1424} 1520}
1425 1521
1426/**************************************************************************** 1522/****************************************************************************
@@ -1451,3 +1547,8 @@ static int __init parse_amd_iommu_options(char *str)
1451 1547
1452__setup("amd_iommu_dump", parse_amd_iommu_dump); 1548__setup("amd_iommu_dump", parse_amd_iommu_dump);
1453__setup("amd_iommu=", parse_amd_iommu_options); 1549__setup("amd_iommu=", parse_amd_iommu_options);
1550
1551IOMMU_INIT_FINISH(amd_iommu_detect,
1552 gart_iommu_hole_init,
1553 0,
1554 0);
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/amd_nb.c
index 0f7bc20cfcde..8f6463d8ed0d 100644
--- a/arch/x86/kernel/k8.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -8,21 +8,19 @@
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <asm/k8.h> 11#include <asm/amd_nb.h>
12
13int num_k8_northbridges;
14EXPORT_SYMBOL(num_k8_northbridges);
15 12
16static u32 *flush_words; 13static u32 *flush_words;
17 14
18struct pci_device_id k8_nb_ids[] = { 15struct pci_device_id k8_nb_ids[] = {
19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, 16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
21 {} 19 {}
22}; 20};
23EXPORT_SYMBOL(k8_nb_ids); 21EXPORT_SYMBOL(k8_nb_ids);
24 22
25struct pci_dev **k8_northbridges; 23struct k8_northbridge_info k8_northbridges;
26EXPORT_SYMBOL(k8_northbridges); 24EXPORT_SYMBOL(k8_northbridges);
27 25
28static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) 26static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
@@ -40,36 +38,45 @@ int cache_k8_northbridges(void)
40 int i; 38 int i;
41 struct pci_dev *dev; 39 struct pci_dev *dev;
42 40
43 if (num_k8_northbridges) 41 if (k8_northbridges.num)
44 return 0; 42 return 0;
45 43
46 dev = NULL; 44 dev = NULL;
47 while ((dev = next_k8_northbridge(dev)) != NULL) 45 while ((dev = next_k8_northbridge(dev)) != NULL)
48 num_k8_northbridges++; 46 k8_northbridges.num++;
47
48 /* some CPU families (e.g. family 0x11) do not support GART */
49 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
50 boot_cpu_data.x86 == 0x15)
51 k8_northbridges.gart_supported = 1;
49 52
50 k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *), 53 k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) *
51 GFP_KERNEL); 54 sizeof(void *), GFP_KERNEL);
52 if (!k8_northbridges) 55 if (!k8_northbridges.nb_misc)
53 return -ENOMEM; 56 return -ENOMEM;
54 57
55 if (!num_k8_northbridges) { 58 if (!k8_northbridges.num) {
56 k8_northbridges[0] = NULL; 59 k8_northbridges.nb_misc[0] = NULL;
57 return 0; 60 return 0;
58 } 61 }
59 62
60 flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL); 63 if (k8_northbridges.gart_supported) {
61 if (!flush_words) { 64 flush_words = kmalloc(k8_northbridges.num * sizeof(u32),
62 kfree(k8_northbridges); 65 GFP_KERNEL);
63 return -ENOMEM; 66 if (!flush_words) {
67 kfree(k8_northbridges.nb_misc);
68 return -ENOMEM;
69 }
64 } 70 }
65 71
66 dev = NULL; 72 dev = NULL;
67 i = 0; 73 i = 0;
68 while ((dev = next_k8_northbridge(dev)) != NULL) { 74 while ((dev = next_k8_northbridge(dev)) != NULL) {
69 k8_northbridges[i] = dev; 75 k8_northbridges.nb_misc[i] = dev;
70 pci_read_config_dword(dev, 0x9c, &flush_words[i++]); 76 if (k8_northbridges.gart_supported)
77 pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
71 } 78 }
72 k8_northbridges[i] = NULL; 79 k8_northbridges.nb_misc[i] = NULL;
73 return 0; 80 return 0;
74} 81}
75EXPORT_SYMBOL_GPL(cache_k8_northbridges); 82EXPORT_SYMBOL_GPL(cache_k8_northbridges);
@@ -93,22 +100,25 @@ void k8_flush_garts(void)
93 unsigned long flags; 100 unsigned long flags;
94 static DEFINE_SPINLOCK(gart_lock); 101 static DEFINE_SPINLOCK(gart_lock);
95 102
103 if (!k8_northbridges.gart_supported)
104 return;
105
96 /* Avoid races between AGP and IOMMU. In theory it's not needed 106 /* Avoid races between AGP and IOMMU. In theory it's not needed
97 but I'm not sure if the hardware won't lose flush requests 107 but I'm not sure if the hardware won't lose flush requests
98 when another is pending. This whole thing is so expensive anyways 108 when another is pending. This whole thing is so expensive anyways
99 that it doesn't matter to serialize more. -AK */ 109 that it doesn't matter to serialize more. -AK */
100 spin_lock_irqsave(&gart_lock, flags); 110 spin_lock_irqsave(&gart_lock, flags);
101 flushed = 0; 111 flushed = 0;
102 for (i = 0; i < num_k8_northbridges; i++) { 112 for (i = 0; i < k8_northbridges.num; i++) {
103 pci_write_config_dword(k8_northbridges[i], 0x9c, 113 pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c,
104 flush_words[i]|1); 114 flush_words[i]|1);
105 flushed++; 115 flushed++;
106 } 116 }
107 for (i = 0; i < num_k8_northbridges; i++) { 117 for (i = 0; i < k8_northbridges.num; i++) {
108 u32 w; 118 u32 w;
109 /* Make sure the hardware actually executed the flush*/ 119 /* Make sure the hardware actually executed the flush*/
110 for (;;) { 120 for (;;) {
111 pci_read_config_dword(k8_northbridges[i], 121 pci_read_config_dword(k8_northbridges.nb_misc[i],
112 0x9c, &w); 122 0x9c, &w);
113 if (!(w & 1)) 123 if (!(w & 1))
114 break; 124 break;
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 8dd77800ff5d..92543c73cf8e 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -231,34 +231,6 @@ static void apbt_restart_clocksource(struct clocksource *cs)
231 apbt_start_counter(phy_cs_timer_id); 231 apbt_start_counter(phy_cs_timer_id);
232} 232}
233 233
234/* Setup IRQ routing via IOAPIC */
235#ifdef CONFIG_SMP
236static void apbt_setup_irq(struct apbt_dev *adev)
237{
238 struct irq_chip *chip;
239 struct irq_desc *desc;
240
241 /* timer0 irq has been setup early */
242 if (adev->irq == 0)
243 return;
244 desc = irq_to_desc(adev->irq);
245 chip = get_irq_chip(adev->irq);
246 disable_irq(adev->irq);
247 desc->status |= IRQ_MOVE_PCNTXT;
248 irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
249 /* APB timer irqs are set up as mp_irqs, timer is edge triggerred */
250 set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge");
251 enable_irq(adev->irq);
252 if (system_state == SYSTEM_BOOTING)
253 if (request_irq(adev->irq, apbt_interrupt_handler,
254 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
255 adev->name, adev)) {
256 printk(KERN_ERR "Failed request IRQ for APBT%d\n",
257 adev->num);
258 }
259}
260#endif
261
262static void apbt_enable_int(int n) 234static void apbt_enable_int(int n)
263{ 235{
264 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); 236 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
@@ -334,6 +306,27 @@ static int __init apbt_clockevent_register(void)
334} 306}
335 307
336#ifdef CONFIG_SMP 308#ifdef CONFIG_SMP
309
310static void apbt_setup_irq(struct apbt_dev *adev)
311{
312 /* timer0 irq has been setup early */
313 if (adev->irq == 0)
314 return;
315
316 if (system_state == SYSTEM_BOOTING) {
317 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
318 /* APB timer irqs are set up as mp_irqs, timer is edge type */
319 __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
320 if (request_irq(adev->irq, apbt_interrupt_handler,
321 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
322 adev->name, adev)) {
323 printk(KERN_ERR "Failed request IRQ for APBT%d\n",
324 adev->num);
325 }
326 } else
327 enable_irq(adev->irq);
328}
329
337/* Should be called with per cpu */ 330/* Should be called with per cpu */
338void apbt_setup_secondary_clock(void) 331void apbt_setup_secondary_clock(void)
339{ 332{
@@ -343,7 +336,7 @@ void apbt_setup_secondary_clock(void)
343 336
344 /* Don't register boot CPU clockevent */ 337 /* Don't register boot CPU clockevent */
345 cpu = smp_processor_id(); 338 cpu = smp_processor_id();
346 if (cpu == boot_cpu_id) 339 if (!cpu)
347 return; 340 return;
348 /* 341 /*
349 * We need to calculate the scaled math multiplication factor for 342 * We need to calculate the scaled math multiplication factor for
@@ -389,16 +382,17 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
389 382
390 switch (action & 0xf) { 383 switch (action & 0xf) {
391 case CPU_DEAD: 384 case CPU_DEAD:
385 disable_irq(adev->irq);
392 apbt_disable_int(cpu); 386 apbt_disable_int(cpu);
393 if (system_state == SYSTEM_RUNNING) 387 if (system_state == SYSTEM_RUNNING) {
394 pr_debug("skipping APBT CPU %lu offline\n", cpu); 388 pr_debug("skipping APBT CPU %lu offline\n", cpu);
395 else if (adev) { 389 } else if (adev) {
396 pr_debug("APBT clockevent for cpu %lu offline\n", cpu); 390 pr_debug("APBT clockevent for cpu %lu offline\n", cpu);
397 free_irq(adev->irq, adev); 391 free_irq(adev->irq, adev);
398 } 392 }
399 break; 393 break;
400 default: 394 default:
401 pr_debug(KERN_INFO "APBT notified %lu, no action\n", action); 395 pr_debug("APBT notified %lu, no action\n", action);
402 } 396 }
403 return NOTIFY_OK; 397 return NOTIFY_OK;
404} 398}
@@ -552,7 +546,7 @@ bad_count:
552 pr_debug("APB CS going back %lx:%lx:%lx ", 546 pr_debug("APB CS going back %lx:%lx:%lx ",
553 t2, last_read, t2 - last_read); 547 t2, last_read, t2 - last_read);
554bad_count_x3: 548bad_count_x3:
555 pr_debug(KERN_INFO "tripple check enforced\n"); 549 pr_debug("triple check enforced\n");
556 t0 = apbt_readl(phy_cs_timer_id, 550 t0 = apbt_readl(phy_cs_timer_id,
557 APBTMR_N_CURRENT_VALUE); 551 APBTMR_N_CURRENT_VALUE);
558 udelay(1); 552 udelay(1);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index a2e0caf26e17..b3a16e8f0703 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -27,7 +27,7 @@
27#include <asm/gart.h> 27#include <asm/gart.h>
28#include <asm/pci-direct.h> 28#include <asm/pci-direct.h>
29#include <asm/dma.h> 29#include <asm/dma.h>
30#include <asm/k8.h> 30#include <asm/amd_nb.h>
31#include <asm/x86_init.h> 31#include <asm/x86_init.h>
32 32
33int gart_iommu_aperture; 33int gart_iommu_aperture;
@@ -307,7 +307,7 @@ void __init early_gart_iommu_check(void)
307 continue; 307 continue;
308 308
309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
310 aper_enabled = ctl & AMD64_GARTEN; 310 aper_enabled = ctl & GARTEN;
311 aper_order = (ctl >> 1) & 7; 311 aper_order = (ctl >> 1) & 7;
312 aper_size = (32 * 1024 * 1024) << aper_order; 312 aper_size = (32 * 1024 * 1024) << aper_order;
313 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; 313 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
@@ -362,7 +362,7 @@ void __init early_gart_iommu_check(void)
362 continue; 362 continue;
363 363
364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
365 ctl &= ~AMD64_GARTEN; 365 ctl &= ~GARTEN;
366 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); 366 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
367 } 367 }
368 } 368 }
@@ -371,7 +371,7 @@ void __init early_gart_iommu_check(void)
371 371
372static int __initdata printed_gart_size_msg; 372static int __initdata printed_gart_size_msg;
373 373
374void __init gart_iommu_hole_init(void) 374int __init gart_iommu_hole_init(void)
375{ 375{
376 u32 agp_aper_base = 0, agp_aper_order = 0; 376 u32 agp_aper_base = 0, agp_aper_order = 0;
377 u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; 377 u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
@@ -381,7 +381,7 @@ void __init gart_iommu_hole_init(void)
381 381
382 if (gart_iommu_aperture_disabled || !fix_aperture || 382 if (gart_iommu_aperture_disabled || !fix_aperture ||
383 !early_pci_allowed()) 383 !early_pci_allowed())
384 return; 384 return -ENODEV;
385 385
386 printk(KERN_INFO "Checking aperture...\n"); 386 printk(KERN_INFO "Checking aperture...\n");
387 387
@@ -463,8 +463,9 @@ out:
463 unsigned long n = (32 * 1024 * 1024) << last_aper_order; 463 unsigned long n = (32 * 1024 * 1024) << last_aper_order;
464 464
465 insert_aperture_resource((u32)last_aper_base, n); 465 insert_aperture_resource((u32)last_aper_base, n);
466 return 1;
466 } 467 }
467 return; 468 return 0;
468 } 469 }
469 470
470 if (!fallback_aper_force) { 471 if (!fallback_aper_force) {
@@ -500,13 +501,18 @@ out:
500 panic("Not enough memory for aperture"); 501 panic("Not enough memory for aperture");
501 } 502 }
502 } else { 503 } else {
503 return; 504 return 0;
504 } 505 }
505 506
506 /* Fix up the north bridges */ 507 /* Fix up the north bridges */
507 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 508 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
508 int bus; 509 int bus, dev_base, dev_limit;
509 int dev_base, dev_limit; 510
511 /*
512 * Don't enable translation yet but enable GART IO and CPU
513 * accesses and set DISTLBWALKPRB since GART table memory is UC.
514 */
515 u32 ctl = DISTLBWALKPRB | aper_order << 1;
510 516
511 bus = bus_dev_ranges[i].bus; 517 bus = bus_dev_ranges[i].bus;
512 dev_base = bus_dev_ranges[i].dev_base; 518 dev_base = bus_dev_ranges[i].dev_base;
@@ -515,13 +521,12 @@ out:
515 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 521 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
516 continue; 522 continue;
517 523
518 /* Don't enable translation yet. That is done later. 524 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
519 Assume this BIOS didn't initialise the GART so
520 just overwrite all previous bits */
521 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1);
522 write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); 525 write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25);
523 } 526 }
524 } 527 }
525 528
526 set_up_gart_resume(aper_order, aper_alloc); 529 set_up_gart_resume(aper_order, aper_alloc);
530
531 return 1;
527} 532}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e3b534cda49a..850657d1b0ed 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -52,6 +52,7 @@
52#include <asm/mce.h> 52#include <asm/mce.h>
53#include <asm/kvm_para.h> 53#include <asm/kvm_para.h>
54#include <asm/tsc.h> 54#include <asm/tsc.h>
55#include <asm/atomic.h>
55 56
56unsigned int num_processors; 57unsigned int num_processors;
57 58
@@ -370,38 +371,87 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
370} 371}
371 372
372/* 373/*
373 * Setup extended LVT, AMD specific (K8, family 10h) 374 * Setup extended LVT, AMD specific
374 * 375 *
375 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and 376 * Software should use the LVT offsets the BIOS provides. The offsets
376 * MCE interrupts are supported. Thus MCE offset must be set to 0. 377 * are determined by the subsystems using it like those for MCE
378 * threshold or IBS. On K8 only offset 0 (APIC500) and MCE interrupts
379 * are supported. Beginning with family 10h at least 4 offsets are
380 * available.
377 * 381 *
378 * If mask=1, the LVT entry does not generate interrupts while mask=0 382 * Since the offsets must be consistent for all cores, we keep track
379 * enables the vector. See also the BKDGs. 383 * of the LVT offsets in software and reserve the offset for the same
384 * vector also to be used on other cores. An offset is freed by
385 * setting the entry to APIC_EILVT_MASKED.
386 *
387 * If the BIOS is right, there should be no conflicts. Otherwise a
388 * "[Firmware Bug]: ..." error message is generated. However, if
389 * software does not properly determines the offsets, it is not
390 * necessarily a BIOS bug.
380 */ 391 */
381 392
382#define APIC_EILVT_LVTOFF_MCE 0 393static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
383#define APIC_EILVT_LVTOFF_IBS 1
384 394
385static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) 395static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
386{ 396{
387 unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0); 397 return (old & APIC_EILVT_MASKED)
388 unsigned int v = (mask << 16) | (msg_type << 8) | vector; 398 || (new == APIC_EILVT_MASKED)
389 399 || ((new & ~APIC_EILVT_MASKED) == old);
390 apic_write(reg, v);
391} 400}
392 401
393u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) 402static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
394{ 403{
395 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); 404 unsigned int rsvd; /* 0: uninitialized */
396 return APIC_EILVT_LVTOFF_MCE; 405
406 if (offset >= APIC_EILVT_NR_MAX)
407 return ~0;
408
409 rsvd = atomic_read(&eilvt_offsets[offset]) & ~APIC_EILVT_MASKED;
410 do {
411 if (rsvd &&
412 !eilvt_entry_is_changeable(rsvd, new))
413 /* may not change if vectors are different */
414 return rsvd;
415 rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
416 } while (rsvd != new);
417
418 return new;
397} 419}
398 420
399u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) 421/*
422 * If mask=1, the LVT entry does not generate interrupts while mask=0
423 * enables the vector. See also the BKDGs.
424 */
425
426int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
400{ 427{
401 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); 428 unsigned long reg = APIC_EILVTn(offset);
402 return APIC_EILVT_LVTOFF_IBS; 429 unsigned int new, old, reserved;
430
431 new = (mask << 16) | (msg_type << 8) | vector;
432 old = apic_read(reg);
433 reserved = reserve_eilvt_offset(offset, new);
434
435 if (reserved != new) {
436 pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but "
437 "vector 0x%x was already reserved by another core, "
438 "APIC%lX=0x%x\n",
439 smp_processor_id(), new, reserved, reg, old);
440 return -EINVAL;
441 }
442
443 if (!eilvt_entry_is_changeable(old, new)) {
444 pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but "
445 "register already in use, APIC%lX=0x%x\n",
446 smp_processor_id(), new, reg, old);
447 return -EBUSY;
448 }
449
450 apic_write(reg, new);
451
452 return 0;
403} 453}
404EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); 454EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
405 455
406/* 456/*
407 * Program the next event, relative to now 457 * Program the next event, relative to now
@@ -1665,10 +1715,7 @@ int __init APIC_init_uniprocessor(void)
1665 } 1715 }
1666#endif 1716#endif
1667 1717
1668#ifndef CONFIG_SMP
1669 enable_IR_x2apic();
1670 default_setup_apic_routing(); 1718 default_setup_apic_routing();
1671#endif
1672 1719
1673 verify_local_APIC(); 1720 verify_local_APIC();
1674 connect_bsp_APIC(); 1721 connect_bsp_APIC();
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5c5b8f3dddb5..8ae808d110f4 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -131,13 +131,9 @@ struct irq_pin_list {
131 struct irq_pin_list *next; 131 struct irq_pin_list *next;
132}; 132};
133 133
134static struct irq_pin_list *get_one_free_irq_2_pin(int node) 134static struct irq_pin_list *alloc_irq_pin_list(int node)
135{ 135{
136 struct irq_pin_list *pin; 136 return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
137
138 pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
139
140 return pin;
141} 137}
142 138
143/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 139/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -150,10 +146,7 @@ static struct irq_cfg irq_cfgx[NR_IRQS];
150int __init arch_early_irq_init(void) 146int __init arch_early_irq_init(void)
151{ 147{
152 struct irq_cfg *cfg; 148 struct irq_cfg *cfg;
153 struct irq_desc *desc; 149 int count, node, i;
154 int count;
155 int node;
156 int i;
157 150
158 if (!legacy_pic->nr_legacy_irqs) { 151 if (!legacy_pic->nr_legacy_irqs) {
159 nr_irqs_gsi = 0; 152 nr_irqs_gsi = 0;
@@ -162,13 +155,15 @@ int __init arch_early_irq_init(void)
162 155
163 cfg = irq_cfgx; 156 cfg = irq_cfgx;
164 count = ARRAY_SIZE(irq_cfgx); 157 count = ARRAY_SIZE(irq_cfgx);
165 node= cpu_to_node(boot_cpu_id); 158 node = cpu_to_node(0);
159
160 /* Make sure the legacy interrupts are marked in the bitmap */
161 irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
166 162
167 for (i = 0; i < count; i++) { 163 for (i = 0; i < count; i++) {
168 desc = irq_to_desc(i); 164 set_irq_chip_data(i, &cfg[i]);
169 desc->chip_data = &cfg[i]; 165 zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
170 zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); 166 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
171 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
172 /* 167 /*
173 * For legacy IRQ's, start with assigning irq0 to irq15 to 168 * For legacy IRQ's, start with assigning irq0 to irq15 to
174 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. 169 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
@@ -183,170 +178,88 @@ int __init arch_early_irq_init(void)
183} 178}
184 179
185#ifdef CONFIG_SPARSE_IRQ 180#ifdef CONFIG_SPARSE_IRQ
186struct irq_cfg *irq_cfg(unsigned int irq) 181static struct irq_cfg *irq_cfg(unsigned int irq)
187{ 182{
188 struct irq_cfg *cfg = NULL; 183 return get_irq_chip_data(irq);
189 struct irq_desc *desc;
190
191 desc = irq_to_desc(irq);
192 if (desc)
193 cfg = desc->chip_data;
194
195 return cfg;
196} 184}
197 185
198static struct irq_cfg *get_one_free_irq_cfg(int node) 186static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
199{ 187{
200 struct irq_cfg *cfg; 188 struct irq_cfg *cfg;
201 189
202 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); 190 cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
203 if (cfg) { 191 if (!cfg)
204 if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { 192 return NULL;
205 kfree(cfg); 193 if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
206 cfg = NULL; 194 goto out_cfg;
207 } else if (!zalloc_cpumask_var_node(&cfg->old_domain, 195 if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
208 GFP_ATOMIC, node)) { 196 goto out_domain;
209 free_cpumask_var(cfg->domain);
210 kfree(cfg);
211 cfg = NULL;
212 }
213 }
214
215 return cfg; 197 return cfg;
198out_domain:
199 free_cpumask_var(cfg->domain);
200out_cfg:
201 kfree(cfg);
202 return NULL;
216} 203}
217 204
218int arch_init_chip_data(struct irq_desc *desc, int node) 205static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
219{
220 struct irq_cfg *cfg;
221
222 cfg = desc->chip_data;
223 if (!cfg) {
224 desc->chip_data = get_one_free_irq_cfg(node);
225 if (!desc->chip_data) {
226 printk(KERN_ERR "can not alloc irq_cfg\n");
227 BUG_ON(1);
228 }
229 }
230
231 return 0;
232}
233
234/* for move_irq_desc */
235static void
236init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
237{ 206{
238 struct irq_pin_list *old_entry, *head, *tail, *entry; 207 if (!cfg)
239
240 cfg->irq_2_pin = NULL;
241 old_entry = old_cfg->irq_2_pin;
242 if (!old_entry)
243 return;
244
245 entry = get_one_free_irq_2_pin(node);
246 if (!entry)
247 return; 208 return;
209 set_irq_chip_data(at, NULL);
210 free_cpumask_var(cfg->domain);
211 free_cpumask_var(cfg->old_domain);
212 kfree(cfg);
213}
248 214
249 entry->apic = old_entry->apic; 215#else
250 entry->pin = old_entry->pin;
251 head = entry;
252 tail = entry;
253 old_entry = old_entry->next;
254 while (old_entry) {
255 entry = get_one_free_irq_2_pin(node);
256 if (!entry) {
257 entry = head;
258 while (entry) {
259 head = entry->next;
260 kfree(entry);
261 entry = head;
262 }
263 /* still use the old one */
264 return;
265 }
266 entry->apic = old_entry->apic;
267 entry->pin = old_entry->pin;
268 tail->next = entry;
269 tail = entry;
270 old_entry = old_entry->next;
271 }
272 216
273 tail->next = NULL; 217struct irq_cfg *irq_cfg(unsigned int irq)
274 cfg->irq_2_pin = head; 218{
219 return irq < nr_irqs ? irq_cfgx + irq : NULL;
275} 220}
276 221
277static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) 222static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
278{ 223{
279 struct irq_pin_list *entry, *next; 224 return irq_cfgx + irq;
280 225}
281 if (old_cfg->irq_2_pin == cfg->irq_2_pin)
282 return;
283 226
284 entry = old_cfg->irq_2_pin; 227static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { }
285 228
286 while (entry) { 229#endif
287 next = entry->next;
288 kfree(entry);
289 entry = next;
290 }
291 old_cfg->irq_2_pin = NULL;
292}
293 230
294void arch_init_copy_chip_data(struct irq_desc *old_desc, 231static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
295 struct irq_desc *desc, int node)
296{ 232{
233 int res = irq_alloc_desc_at(at, node);
297 struct irq_cfg *cfg; 234 struct irq_cfg *cfg;
298 struct irq_cfg *old_cfg;
299
300 cfg = get_one_free_irq_cfg(node);
301 235
302 if (!cfg) 236 if (res < 0) {
303 return; 237 if (res != -EEXIST)
304 238 return NULL;
305 desc->chip_data = cfg; 239 cfg = get_irq_chip_data(at);
306 240 if (cfg)
307 old_cfg = old_desc->chip_data; 241 return cfg;
308 242 }
309 cfg->vector = old_cfg->vector;
310 cfg->move_in_progress = old_cfg->move_in_progress;
311 cpumask_copy(cfg->domain, old_cfg->domain);
312 cpumask_copy(cfg->old_domain, old_cfg->old_domain);
313
314 init_copy_irq_2_pin(old_cfg, cfg, node);
315}
316 243
317static void free_irq_cfg(struct irq_cfg *cfg) 244 cfg = alloc_irq_cfg(at, node);
318{ 245 if (cfg)
319 free_cpumask_var(cfg->domain); 246 set_irq_chip_data(at, cfg);
320 free_cpumask_var(cfg->old_domain); 247 else
321 kfree(cfg); 248 irq_free_desc(at);
249 return cfg;
322} 250}
323 251
324void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) 252static int alloc_irq_from(unsigned int from, int node)
325{ 253{
326 struct irq_cfg *old_cfg, *cfg; 254 return irq_alloc_desc_from(from, node);
327
328 old_cfg = old_desc->chip_data;
329 cfg = desc->chip_data;
330
331 if (old_cfg == cfg)
332 return;
333
334 if (old_cfg) {
335 free_irq_2_pin(old_cfg, cfg);
336 free_irq_cfg(old_cfg);
337 old_desc->chip_data = NULL;
338 }
339} 255}
340/* end for move_irq_desc */
341 256
342#else 257static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
343struct irq_cfg *irq_cfg(unsigned int irq)
344{ 258{
345 return irq < nr_irqs ? irq_cfgx + irq : NULL; 259 free_irq_cfg(at, cfg);
260 irq_free_desc(at);
346} 261}
347 262
348#endif
349
350struct io_apic { 263struct io_apic {
351 unsigned int index; 264 unsigned int index;
352 unsigned int unused[3]; 265 unsigned int unused[3];
@@ -451,7 +364,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
451 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 364 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
452} 365}
453 366
454void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 367static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
455{ 368{
456 unsigned long flags; 369 unsigned long flags;
457 raw_spin_lock_irqsave(&ioapic_lock, flags); 370 raw_spin_lock_irqsave(&ioapic_lock, flags);
@@ -481,7 +394,7 @@ static void ioapic_mask_entry(int apic, int pin)
481 * fast in the common case, and fast for shared ISA-space IRQs. 394 * fast in the common case, and fast for shared ISA-space IRQs.
482 */ 395 */
483static int 396static int
484add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) 397__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
485{ 398{
486 struct irq_pin_list **last, *entry; 399 struct irq_pin_list **last, *entry;
487 400
@@ -493,7 +406,7 @@ add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
493 last = &entry->next; 406 last = &entry->next;
494 } 407 }
495 408
496 entry = get_one_free_irq_2_pin(node); 409 entry = alloc_irq_pin_list(node);
497 if (!entry) { 410 if (!entry) {
498 printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", 411 printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
499 node, apic, pin); 412 node, apic, pin);
@@ -508,7 +421,7 @@ add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
508 421
509static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) 422static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
510{ 423{
511 if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin)) 424 if (__add_pin_to_irq_node(cfg, node, apic, pin))
512 panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); 425 panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
513} 426}
514 427
@@ -571,11 +484,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
571 IO_APIC_REDIR_LEVEL_TRIGGER, NULL); 484 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
572} 485}
573 486
574static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
575{
576 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
577}
578
579static void io_apic_sync(struct irq_pin_list *entry) 487static void io_apic_sync(struct irq_pin_list *entry)
580{ 488{
581 /* 489 /*
@@ -587,44 +495,37 @@ static void io_apic_sync(struct irq_pin_list *entry)
587 readl(&io_apic->data); 495 readl(&io_apic->data);
588} 496}
589 497
590static void __mask_IO_APIC_irq(struct irq_cfg *cfg) 498static void mask_ioapic(struct irq_cfg *cfg)
591{ 499{
500 unsigned long flags;
501
502 raw_spin_lock_irqsave(&ioapic_lock, flags);
592 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); 503 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
504 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
593} 505}
594 506
595static void mask_IO_APIC_irq_desc(struct irq_desc *desc) 507static void mask_ioapic_irq(struct irq_data *data)
596{ 508{
597 struct irq_cfg *cfg = desc->chip_data; 509 mask_ioapic(data->chip_data);
598 unsigned long flags; 510}
599
600 BUG_ON(!cfg);
601 511
602 raw_spin_lock_irqsave(&ioapic_lock, flags); 512static void __unmask_ioapic(struct irq_cfg *cfg)
603 __mask_IO_APIC_irq(cfg); 513{
604 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 514 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
605} 515}
606 516
607static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) 517static void unmask_ioapic(struct irq_cfg *cfg)
608{ 518{
609 struct irq_cfg *cfg = desc->chip_data;
610 unsigned long flags; 519 unsigned long flags;
611 520
612 raw_spin_lock_irqsave(&ioapic_lock, flags); 521 raw_spin_lock_irqsave(&ioapic_lock, flags);
613 __unmask_IO_APIC_irq(cfg); 522 __unmask_ioapic(cfg);
614 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 523 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
615} 524}
616 525
617static void mask_IO_APIC_irq(unsigned int irq) 526static void unmask_ioapic_irq(struct irq_data *data)
618{ 527{
619 struct irq_desc *desc = irq_to_desc(irq); 528 unmask_ioapic(data->chip_data);
620
621 mask_IO_APIC_irq_desc(desc);
622}
623static void unmask_IO_APIC_irq(unsigned int irq)
624{
625 struct irq_desc *desc = irq_to_desc(irq);
626
627 unmask_IO_APIC_irq_desc(desc);
628} 529}
629 530
630static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 531static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
@@ -694,14 +595,14 @@ struct IO_APIC_route_entry **alloc_ioapic_entries(void)
694 struct IO_APIC_route_entry **ioapic_entries; 595 struct IO_APIC_route_entry **ioapic_entries;
695 596
696 ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, 597 ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics,
697 GFP_ATOMIC); 598 GFP_KERNEL);
698 if (!ioapic_entries) 599 if (!ioapic_entries)
699 return 0; 600 return 0;
700 601
701 for (apic = 0; apic < nr_ioapics; apic++) { 602 for (apic = 0; apic < nr_ioapics; apic++) {
702 ioapic_entries[apic] = 603 ioapic_entries[apic] =
703 kzalloc(sizeof(struct IO_APIC_route_entry) * 604 kzalloc(sizeof(struct IO_APIC_route_entry) *
704 nr_ioapic_registers[apic], GFP_ATOMIC); 605 nr_ioapic_registers[apic], GFP_KERNEL);
705 if (!ioapic_entries[apic]) 606 if (!ioapic_entries[apic])
706 goto nomem; 607 goto nomem;
707 } 608 }
@@ -1259,7 +1160,6 @@ void __setup_vector_irq(int cpu)
1259 /* Initialize vector_irq on a new cpu */ 1160 /* Initialize vector_irq on a new cpu */
1260 int irq, vector; 1161 int irq, vector;
1261 struct irq_cfg *cfg; 1162 struct irq_cfg *cfg;
1262 struct irq_desc *desc;
1263 1163
1264 /* 1164 /*
1265 * vector_lock will make sure that we don't run into irq vector 1165 * vector_lock will make sure that we don't run into irq vector
@@ -1268,9 +1168,10 @@ void __setup_vector_irq(int cpu)
1268 */ 1168 */
1269 raw_spin_lock(&vector_lock); 1169 raw_spin_lock(&vector_lock);
1270 /* Mark the inuse vectors */ 1170 /* Mark the inuse vectors */
1271 for_each_irq_desc(irq, desc) { 1171 for_each_active_irq(irq) {
1272 cfg = desc->chip_data; 1172 cfg = get_irq_chip_data(irq);
1273 1173 if (!cfg)
1174 continue;
1274 /* 1175 /*
1275 * If it is a legacy IRQ handled by the legacy PIC, this cpu 1176 * If it is a legacy IRQ handled by the legacy PIC, this cpu
1276 * will be part of the irq_cfg's domain. 1177 * will be part of the irq_cfg's domain.
@@ -1327,17 +1228,17 @@ static inline int IO_APIC_irq_trigger(int irq)
1327} 1228}
1328#endif 1229#endif
1329 1230
1330static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) 1231static void ioapic_register_intr(unsigned int irq, unsigned long trigger)
1331{ 1232{
1332 1233
1333 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1234 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1334 trigger == IOAPIC_LEVEL) 1235 trigger == IOAPIC_LEVEL)
1335 desc->status |= IRQ_LEVEL; 1236 irq_set_status_flags(irq, IRQ_LEVEL);
1336 else 1237 else
1337 desc->status &= ~IRQ_LEVEL; 1238 irq_clear_status_flags(irq, IRQ_LEVEL);
1338 1239
1339 if (irq_remapped(irq)) { 1240 if (irq_remapped(get_irq_chip_data(irq))) {
1340 desc->status |= IRQ_MOVE_PCNTXT; 1241 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
1341 if (trigger) 1242 if (trigger)
1342 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, 1243 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
1343 handle_fasteoi_irq, 1244 handle_fasteoi_irq,
@@ -1358,10 +1259,10 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
1358 handle_edge_irq, "edge"); 1259 handle_edge_irq, "edge");
1359} 1260}
1360 1261
1361int setup_ioapic_entry(int apic_id, int irq, 1262static int setup_ioapic_entry(int apic_id, int irq,
1362 struct IO_APIC_route_entry *entry, 1263 struct IO_APIC_route_entry *entry,
1363 unsigned int destination, int trigger, 1264 unsigned int destination, int trigger,
1364 int polarity, int vector, int pin) 1265 int polarity, int vector, int pin)
1365{ 1266{
1366 /* 1267 /*
1367 * add it to the IO-APIC irq-routing table: 1268 * add it to the IO-APIC irq-routing table:
@@ -1382,21 +1283,7 @@ int setup_ioapic_entry(int apic_id, int irq,
1382 if (index < 0) 1283 if (index < 0)
1383 panic("Failed to allocate IRTE for ioapic %d\n", apic_id); 1284 panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
1384 1285
1385 memset(&irte, 0, sizeof(irte)); 1286 prepare_irte(&irte, vector, destination);
1386
1387 irte.present = 1;
1388 irte.dst_mode = apic->irq_dest_mode;
1389 /*
1390 * Trigger mode in the IRTE will always be edge, and the
1391 * actual level or edge trigger will be setup in the IO-APIC
1392 * RTE. This will help simplify level triggered irq migration.
1393 * For more details, see the comments above explainig IO-APIC
1394 * irq migration in the presence of interrupt-remapping.
1395 */
1396 irte.trigger_mode = 0;
1397 irte.dlvry_mode = apic->irq_delivery_mode;
1398 irte.vector = vector;
1399 irte.dest_id = IRTE_DEST(destination);
1400 1287
1401 /* Set source-id of interrupt request */ 1288 /* Set source-id of interrupt request */
1402 set_ioapic_sid(&irte, apic_id); 1289 set_ioapic_sid(&irte, apic_id);
@@ -1431,18 +1318,14 @@ int setup_ioapic_entry(int apic_id, int irq,
1431 return 0; 1318 return 0;
1432} 1319}
1433 1320
1434static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc, 1321static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
1435 int trigger, int polarity) 1322 struct irq_cfg *cfg, int trigger, int polarity)
1436{ 1323{
1437 struct irq_cfg *cfg;
1438 struct IO_APIC_route_entry entry; 1324 struct IO_APIC_route_entry entry;
1439 unsigned int dest; 1325 unsigned int dest;
1440 1326
1441 if (!IO_APIC_IRQ(irq)) 1327 if (!IO_APIC_IRQ(irq))
1442 return; 1328 return;
1443
1444 cfg = desc->chip_data;
1445
1446 /* 1329 /*
1447 * For legacy irqs, cfg->domain starts with cpu 0 for legacy 1330 * For legacy irqs, cfg->domain starts with cpu 0 for legacy
1448 * controllers like 8259. Now that IO-APIC can handle this irq, update 1331 * controllers like 8259. Now that IO-APIC can handle this irq, update
@@ -1471,9 +1354,9 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1471 return; 1354 return;
1472 } 1355 }
1473 1356
1474 ioapic_register_intr(irq, desc, trigger); 1357 ioapic_register_intr(irq, trigger);
1475 if (irq < legacy_pic->nr_legacy_irqs) 1358 if (irq < legacy_pic->nr_legacy_irqs)
1476 legacy_pic->chip->mask(irq); 1359 legacy_pic->mask(irq);
1477 1360
1478 ioapic_write_entry(apic_id, pin, entry); 1361 ioapic_write_entry(apic_id, pin, entry);
1479} 1362}
@@ -1484,11 +1367,9 @@ static struct {
1484 1367
1485static void __init setup_IO_APIC_irqs(void) 1368static void __init setup_IO_APIC_irqs(void)
1486{ 1369{
1487 int apic_id, pin, idx, irq; 1370 int apic_id, pin, idx, irq, notcon = 0;
1488 int notcon = 0; 1371 int node = cpu_to_node(0);
1489 struct irq_desc *desc;
1490 struct irq_cfg *cfg; 1372 struct irq_cfg *cfg;
1491 int node = cpu_to_node(boot_cpu_id);
1492 1373
1493 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1374 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1494 1375
@@ -1525,19 +1406,17 @@ static void __init setup_IO_APIC_irqs(void)
1525 apic->multi_timer_check(apic_id, irq)) 1406 apic->multi_timer_check(apic_id, irq))
1526 continue; 1407 continue;
1527 1408
1528 desc = irq_to_desc_alloc_node(irq, node); 1409 cfg = alloc_irq_and_cfg_at(irq, node);
1529 if (!desc) { 1410 if (!cfg)
1530 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1531 continue; 1411 continue;
1532 } 1412
1533 cfg = desc->chip_data;
1534 add_pin_to_irq_node(cfg, node, apic_id, pin); 1413 add_pin_to_irq_node(cfg, node, apic_id, pin);
1535 /* 1414 /*
1536 * don't mark it in pin_programmed, so later acpi could 1415 * don't mark it in pin_programmed, so later acpi could
1537 * set it correctly when irq < 16 1416 * set it correctly when irq < 16
1538 */ 1417 */
1539 setup_IO_APIC_irq(apic_id, pin, irq, desc, 1418 setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx),
1540 irq_trigger(idx), irq_polarity(idx)); 1419 irq_polarity(idx));
1541 } 1420 }
1542 1421
1543 if (notcon) 1422 if (notcon)
@@ -1552,9 +1431,7 @@ static void __init setup_IO_APIC_irqs(void)
1552 */ 1431 */
1553void setup_IO_APIC_irq_extra(u32 gsi) 1432void setup_IO_APIC_irq_extra(u32 gsi)
1554{ 1433{
1555 int apic_id = 0, pin, idx, irq; 1434 int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
1556 int node = cpu_to_node(boot_cpu_id);
1557 struct irq_desc *desc;
1558 struct irq_cfg *cfg; 1435 struct irq_cfg *cfg;
1559 1436
1560 /* 1437 /*
@@ -1570,18 +1447,15 @@ void setup_IO_APIC_irq_extra(u32 gsi)
1570 return; 1447 return;
1571 1448
1572 irq = pin_2_irq(idx, apic_id, pin); 1449 irq = pin_2_irq(idx, apic_id, pin);
1573#ifdef CONFIG_SPARSE_IRQ 1450
1574 desc = irq_to_desc(irq); 1451 /* Only handle the non legacy irqs on secondary ioapics */
1575 if (desc) 1452 if (apic_id == 0 || irq < NR_IRQS_LEGACY)
1576 return; 1453 return;
1577#endif 1454
1578 desc = irq_to_desc_alloc_node(irq, node); 1455 cfg = alloc_irq_and_cfg_at(irq, node);
1579 if (!desc) { 1456 if (!cfg)
1580 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1581 return; 1457 return;
1582 }
1583 1458
1584 cfg = desc->chip_data;
1585 add_pin_to_irq_node(cfg, node, apic_id, pin); 1459 add_pin_to_irq_node(cfg, node, apic_id, pin);
1586 1460
1587 if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { 1461 if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
@@ -1591,7 +1465,7 @@ void setup_IO_APIC_irq_extra(u32 gsi)
1591 } 1465 }
1592 set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); 1466 set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
1593 1467
1594 setup_IO_APIC_irq(apic_id, pin, irq, desc, 1468 setup_ioapic_irq(apic_id, pin, irq, cfg,
1595 irq_trigger(idx), irq_polarity(idx)); 1469 irq_trigger(idx), irq_polarity(idx));
1596} 1470}
1597 1471
@@ -1642,7 +1516,6 @@ __apicdebuginit(void) print_IO_APIC(void)
1642 union IO_APIC_reg_03 reg_03; 1516 union IO_APIC_reg_03 reg_03;
1643 unsigned long flags; 1517 unsigned long flags;
1644 struct irq_cfg *cfg; 1518 struct irq_cfg *cfg;
1645 struct irq_desc *desc;
1646 unsigned int irq; 1519 unsigned int irq;
1647 1520
1648 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1521 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
@@ -1729,10 +1602,10 @@ __apicdebuginit(void) print_IO_APIC(void)
1729 } 1602 }
1730 } 1603 }
1731 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1604 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1732 for_each_irq_desc(irq, desc) { 1605 for_each_active_irq(irq) {
1733 struct irq_pin_list *entry; 1606 struct irq_pin_list *entry;
1734 1607
1735 cfg = desc->chip_data; 1608 cfg = get_irq_chip_data(irq);
1736 if (!cfg) 1609 if (!cfg)
1737 continue; 1610 continue;
1738 entry = cfg->irq_2_pin; 1611 entry = cfg->irq_2_pin;
@@ -2239,29 +2112,26 @@ static int __init timer_irq_works(void)
2239 * an edge even if it isn't on the 8259A... 2112 * an edge even if it isn't on the 8259A...
2240 */ 2113 */
2241 2114
2242static unsigned int startup_ioapic_irq(unsigned int irq) 2115static unsigned int startup_ioapic_irq(struct irq_data *data)
2243{ 2116{
2244 int was_pending = 0; 2117 int was_pending = 0, irq = data->irq;
2245 unsigned long flags; 2118 unsigned long flags;
2246 struct irq_cfg *cfg;
2247 2119
2248 raw_spin_lock_irqsave(&ioapic_lock, flags); 2120 raw_spin_lock_irqsave(&ioapic_lock, flags);
2249 if (irq < legacy_pic->nr_legacy_irqs) { 2121 if (irq < legacy_pic->nr_legacy_irqs) {
2250 legacy_pic->chip->mask(irq); 2122 legacy_pic->mask(irq);
2251 if (legacy_pic->irq_pending(irq)) 2123 if (legacy_pic->irq_pending(irq))
2252 was_pending = 1; 2124 was_pending = 1;
2253 } 2125 }
2254 cfg = irq_cfg(irq); 2126 __unmask_ioapic(data->chip_data);
2255 __unmask_IO_APIC_irq(cfg);
2256 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2127 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2257 2128
2258 return was_pending; 2129 return was_pending;
2259} 2130}
2260 2131
2261static int ioapic_retrigger_irq(unsigned int irq) 2132static int ioapic_retrigger_irq(struct irq_data *data)
2262{ 2133{
2263 2134 struct irq_cfg *cfg = data->chip_data;
2264 struct irq_cfg *cfg = irq_cfg(irq);
2265 unsigned long flags; 2135 unsigned long flags;
2266 2136
2267 raw_spin_lock_irqsave(&vector_lock, flags); 2137 raw_spin_lock_irqsave(&vector_lock, flags);
@@ -2312,7 +2182,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2312 * With interrupt-remapping, destination information comes 2182 * With interrupt-remapping, destination information comes
2313 * from interrupt-remapping table entry. 2183 * from interrupt-remapping table entry.
2314 */ 2184 */
2315 if (!irq_remapped(irq)) 2185 if (!irq_remapped(cfg))
2316 io_apic_write(apic, 0x11 + pin*2, dest); 2186 io_apic_write(apic, 0x11 + pin*2, dest);
2317 reg = io_apic_read(apic, 0x10 + pin*2); 2187 reg = io_apic_read(apic, 0x10 + pin*2);
2318 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 2188 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
@@ -2322,65 +2192,46 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2322} 2192}
2323 2193
2324/* 2194/*
2325 * Either sets desc->affinity to a valid value, and returns 2195 * Either sets data->affinity to a valid value, and returns
2326 * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and 2196 * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
2327 * leaves desc->affinity untouched. 2197 * leaves data->affinity untouched.
2328 */ 2198 */
2329unsigned int 2199int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2330set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask, 2200 unsigned int *dest_id)
2331 unsigned int *dest_id)
2332{ 2201{
2333 struct irq_cfg *cfg; 2202 struct irq_cfg *cfg = data->chip_data;
2334 unsigned int irq;
2335 2203
2336 if (!cpumask_intersects(mask, cpu_online_mask)) 2204 if (!cpumask_intersects(mask, cpu_online_mask))
2337 return -1; 2205 return -1;
2338 2206
2339 irq = desc->irq; 2207 if (assign_irq_vector(data->irq, data->chip_data, mask))
2340 cfg = desc->chip_data;
2341 if (assign_irq_vector(irq, cfg, mask))
2342 return -1; 2208 return -1;
2343 2209
2344 cpumask_copy(desc->affinity, mask); 2210 cpumask_copy(data->affinity, mask);
2345 2211
2346 *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); 2212 *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain);
2347 return 0; 2213 return 0;
2348} 2214}
2349 2215
2350static int 2216static int
2351set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) 2217ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2218 bool force)
2352{ 2219{
2353 struct irq_cfg *cfg; 2220 unsigned int dest, irq = data->irq;
2354 unsigned long flags; 2221 unsigned long flags;
2355 unsigned int dest; 2222 int ret;
2356 unsigned int irq;
2357 int ret = -1;
2358
2359 irq = desc->irq;
2360 cfg = desc->chip_data;
2361 2223
2362 raw_spin_lock_irqsave(&ioapic_lock, flags); 2224 raw_spin_lock_irqsave(&ioapic_lock, flags);
2363 ret = set_desc_affinity(desc, mask, &dest); 2225 ret = __ioapic_set_affinity(data, mask, &dest);
2364 if (!ret) { 2226 if (!ret) {
2365 /* Only the high 8 bits are valid. */ 2227 /* Only the high 8 bits are valid. */
2366 dest = SET_APIC_LOGICAL_ID(dest); 2228 dest = SET_APIC_LOGICAL_ID(dest);
2367 __target_IO_APIC_irq(irq, dest, cfg); 2229 __target_IO_APIC_irq(irq, dest, data->chip_data);
2368 } 2230 }
2369 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2231 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2370
2371 return ret; 2232 return ret;
2372} 2233}
2373 2234
2374static int
2375set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
2376{
2377 struct irq_desc *desc;
2378
2379 desc = irq_to_desc(irq);
2380
2381 return set_ioapic_affinity_irq_desc(desc, mask);
2382}
2383
2384#ifdef CONFIG_INTR_REMAP 2235#ifdef CONFIG_INTR_REMAP
2385 2236
2386/* 2237/*
@@ -2395,24 +2246,21 @@ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
2395 * the interrupt-remapping table entry. 2246 * the interrupt-remapping table entry.
2396 */ 2247 */
2397static int 2248static int
2398migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) 2249ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2250 bool force)
2399{ 2251{
2400 struct irq_cfg *cfg; 2252 struct irq_cfg *cfg = data->chip_data;
2253 unsigned int dest, irq = data->irq;
2401 struct irte irte; 2254 struct irte irte;
2402 unsigned int dest;
2403 unsigned int irq;
2404 int ret = -1;
2405 2255
2406 if (!cpumask_intersects(mask, cpu_online_mask)) 2256 if (!cpumask_intersects(mask, cpu_online_mask))
2407 return ret; 2257 return -EINVAL;
2408 2258
2409 irq = desc->irq;
2410 if (get_irte(irq, &irte)) 2259 if (get_irte(irq, &irte))
2411 return ret; 2260 return -EBUSY;
2412 2261
2413 cfg = desc->chip_data;
2414 if (assign_irq_vector(irq, cfg, mask)) 2262 if (assign_irq_vector(irq, cfg, mask))
2415 return ret; 2263 return -EBUSY;
2416 2264
2417 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); 2265 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2418 2266
@@ -2427,29 +2275,14 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2427 if (cfg->move_in_progress) 2275 if (cfg->move_in_progress)
2428 send_cleanup_vector(cfg); 2276 send_cleanup_vector(cfg);
2429 2277
2430 cpumask_copy(desc->affinity, mask); 2278 cpumask_copy(data->affinity, mask);
2431
2432 return 0; 2279 return 0;
2433} 2280}
2434 2281
2435/*
2436 * Migrates the IRQ destination in the process context.
2437 */
2438static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2439 const struct cpumask *mask)
2440{
2441 return migrate_ioapic_irq_desc(desc, mask);
2442}
2443static int set_ir_ioapic_affinity_irq(unsigned int irq,
2444 const struct cpumask *mask)
2445{
2446 struct irq_desc *desc = irq_to_desc(irq);
2447
2448 return set_ir_ioapic_affinity_irq_desc(desc, mask);
2449}
2450#else 2282#else
2451static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, 2283static inline int
2452 const struct cpumask *mask) 2284ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2285 bool force)
2453{ 2286{
2454 return 0; 2287 return 0;
2455} 2288}
@@ -2511,10 +2344,8 @@ unlock:
2511 irq_exit(); 2344 irq_exit();
2512} 2345}
2513 2346
2514static void __irq_complete_move(struct irq_desc **descp, unsigned vector) 2347static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
2515{ 2348{
2516 struct irq_desc *desc = *descp;
2517 struct irq_cfg *cfg = desc->chip_data;
2518 unsigned me; 2349 unsigned me;
2519 2350
2520 if (likely(!cfg->move_in_progress)) 2351 if (likely(!cfg->move_in_progress))
@@ -2526,31 +2357,28 @@ static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
2526 send_cleanup_vector(cfg); 2357 send_cleanup_vector(cfg);
2527} 2358}
2528 2359
2529static void irq_complete_move(struct irq_desc **descp) 2360static void irq_complete_move(struct irq_cfg *cfg)
2530{ 2361{
2531 __irq_complete_move(descp, ~get_irq_regs()->orig_ax); 2362 __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
2532} 2363}
2533 2364
2534void irq_force_complete_move(int irq) 2365void irq_force_complete_move(int irq)
2535{ 2366{
2536 struct irq_desc *desc = irq_to_desc(irq); 2367 struct irq_cfg *cfg = get_irq_chip_data(irq);
2537 struct irq_cfg *cfg = desc->chip_data;
2538 2368
2539 if (!cfg) 2369 if (!cfg)
2540 return; 2370 return;
2541 2371
2542 __irq_complete_move(&desc, cfg->vector); 2372 __irq_complete_move(cfg, cfg->vector);
2543} 2373}
2544#else 2374#else
2545static inline void irq_complete_move(struct irq_desc **descp) {} 2375static inline void irq_complete_move(struct irq_cfg *cfg) { }
2546#endif 2376#endif
2547 2377
2548static void ack_apic_edge(unsigned int irq) 2378static void ack_apic_edge(struct irq_data *data)
2549{ 2379{
2550 struct irq_desc *desc = irq_to_desc(irq); 2380 irq_complete_move(data->chip_data);
2551 2381 move_native_irq(data->irq);
2552 irq_complete_move(&desc);
2553 move_native_irq(irq);
2554 ack_APIC_irq(); 2382 ack_APIC_irq();
2555} 2383}
2556 2384
@@ -2572,10 +2400,12 @@ atomic_t irq_mis_count;
2572 * Otherwise, we simulate the EOI message manually by changing the trigger 2400 * Otherwise, we simulate the EOI message manually by changing the trigger
2573 * mode to edge and then back to level, with RTE being masked during this. 2401 * mode to edge and then back to level, with RTE being masked during this.
2574*/ 2402*/
2575static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) 2403static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2576{ 2404{
2577 struct irq_pin_list *entry; 2405 struct irq_pin_list *entry;
2406 unsigned long flags;
2578 2407
2408 raw_spin_lock_irqsave(&ioapic_lock, flags);
2579 for_each_irq_pin(entry, cfg->irq_2_pin) { 2409 for_each_irq_pin(entry, cfg->irq_2_pin) {
2580 if (mp_ioapics[entry->apic].apicver >= 0x20) { 2410 if (mp_ioapics[entry->apic].apicver >= 0x20) {
2581 /* 2411 /*
@@ -2584,7 +2414,7 @@ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2584 * intr-remapping table entry. Hence for the io-apic 2414 * intr-remapping table entry. Hence for the io-apic
2585 * EOI we use the pin number. 2415 * EOI we use the pin number.
2586 */ 2416 */
2587 if (irq_remapped(irq)) 2417 if (irq_remapped(cfg))
2588 io_apic_eoi(entry->apic, entry->pin); 2418 io_apic_eoi(entry->apic, entry->pin);
2589 else 2419 else
2590 io_apic_eoi(entry->apic, cfg->vector); 2420 io_apic_eoi(entry->apic, cfg->vector);
@@ -2593,36 +2423,22 @@ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2593 __unmask_and_level_IO_APIC_irq(entry); 2423 __unmask_and_level_IO_APIC_irq(entry);
2594 } 2424 }
2595 } 2425 }
2596}
2597
2598static void eoi_ioapic_irq(struct irq_desc *desc)
2599{
2600 struct irq_cfg *cfg;
2601 unsigned long flags;
2602 unsigned int irq;
2603
2604 irq = desc->irq;
2605 cfg = desc->chip_data;
2606
2607 raw_spin_lock_irqsave(&ioapic_lock, flags);
2608 __eoi_ioapic_irq(irq, cfg);
2609 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2426 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2610} 2427}
2611 2428
2612static void ack_apic_level(unsigned int irq) 2429static void ack_apic_level(struct irq_data *data)
2613{ 2430{
2431 struct irq_cfg *cfg = data->chip_data;
2432 int i, do_unmask_irq = 0, irq = data->irq;
2614 struct irq_desc *desc = irq_to_desc(irq); 2433 struct irq_desc *desc = irq_to_desc(irq);
2615 unsigned long v; 2434 unsigned long v;
2616 int i;
2617 struct irq_cfg *cfg;
2618 int do_unmask_irq = 0;
2619 2435
2620 irq_complete_move(&desc); 2436 irq_complete_move(cfg);
2621#ifdef CONFIG_GENERIC_PENDING_IRQ 2437#ifdef CONFIG_GENERIC_PENDING_IRQ
2622 /* If we are moving the irq we need to mask it */ 2438 /* If we are moving the irq we need to mask it */
2623 if (unlikely(desc->status & IRQ_MOVE_PENDING)) { 2439 if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
2624 do_unmask_irq = 1; 2440 do_unmask_irq = 1;
2625 mask_IO_APIC_irq_desc(desc); 2441 mask_ioapic(cfg);
2626 } 2442 }
2627#endif 2443#endif
2628 2444
@@ -2658,7 +2474,6 @@ static void ack_apic_level(unsigned int irq)
2658 * we use the above logic (mask+edge followed by unmask+level) from 2474 * we use the above logic (mask+edge followed by unmask+level) from
2659 * Manfred Spraul to clear the remote IRR. 2475 * Manfred Spraul to clear the remote IRR.
2660 */ 2476 */
2661 cfg = desc->chip_data;
2662 i = cfg->vector; 2477 i = cfg->vector;
2663 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); 2478 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2664 2479
@@ -2678,7 +2493,7 @@ static void ack_apic_level(unsigned int irq)
2678 if (!(v & (1 << (i & 0x1f)))) { 2493 if (!(v & (1 << (i & 0x1f)))) {
2679 atomic_inc(&irq_mis_count); 2494 atomic_inc(&irq_mis_count);
2680 2495
2681 eoi_ioapic_irq(desc); 2496 eoi_ioapic_irq(irq, cfg);
2682 } 2497 }
2683 2498
2684 /* Now we can move and renable the irq */ 2499 /* Now we can move and renable the irq */
@@ -2709,61 +2524,57 @@ static void ack_apic_level(unsigned int irq)
2709 * accurate and is causing problems then it is a hardware bug 2524 * accurate and is causing problems then it is a hardware bug
2710 * and you can go talk to the chipset vendor about it. 2525 * and you can go talk to the chipset vendor about it.
2711 */ 2526 */
2712 cfg = desc->chip_data;
2713 if (!io_apic_level_ack_pending(cfg)) 2527 if (!io_apic_level_ack_pending(cfg))
2714 move_masked_irq(irq); 2528 move_masked_irq(irq);
2715 unmask_IO_APIC_irq_desc(desc); 2529 unmask_ioapic(cfg);
2716 } 2530 }
2717} 2531}
2718 2532
2719#ifdef CONFIG_INTR_REMAP 2533#ifdef CONFIG_INTR_REMAP
2720static void ir_ack_apic_edge(unsigned int irq) 2534static void ir_ack_apic_edge(struct irq_data *data)
2721{ 2535{
2722 ack_APIC_irq(); 2536 ack_APIC_irq();
2723} 2537}
2724 2538
2725static void ir_ack_apic_level(unsigned int irq) 2539static void ir_ack_apic_level(struct irq_data *data)
2726{ 2540{
2727 struct irq_desc *desc = irq_to_desc(irq);
2728
2729 ack_APIC_irq(); 2541 ack_APIC_irq();
2730 eoi_ioapic_irq(desc); 2542 eoi_ioapic_irq(data->irq, data->chip_data);
2731} 2543}
2732#endif /* CONFIG_INTR_REMAP */ 2544#endif /* CONFIG_INTR_REMAP */
2733 2545
2734static struct irq_chip ioapic_chip __read_mostly = { 2546static struct irq_chip ioapic_chip __read_mostly = {
2735 .name = "IO-APIC", 2547 .name = "IO-APIC",
2736 .startup = startup_ioapic_irq, 2548 .irq_startup = startup_ioapic_irq,
2737 .mask = mask_IO_APIC_irq, 2549 .irq_mask = mask_ioapic_irq,
2738 .unmask = unmask_IO_APIC_irq, 2550 .irq_unmask = unmask_ioapic_irq,
2739 .ack = ack_apic_edge, 2551 .irq_ack = ack_apic_edge,
2740 .eoi = ack_apic_level, 2552 .irq_eoi = ack_apic_level,
2741#ifdef CONFIG_SMP 2553#ifdef CONFIG_SMP
2742 .set_affinity = set_ioapic_affinity_irq, 2554 .irq_set_affinity = ioapic_set_affinity,
2743#endif 2555#endif
2744 .retrigger = ioapic_retrigger_irq, 2556 .irq_retrigger = ioapic_retrigger_irq,
2745}; 2557};
2746 2558
2747static struct irq_chip ir_ioapic_chip __read_mostly = { 2559static struct irq_chip ir_ioapic_chip __read_mostly = {
2748 .name = "IR-IO-APIC", 2560 .name = "IR-IO-APIC",
2749 .startup = startup_ioapic_irq, 2561 .irq_startup = startup_ioapic_irq,
2750 .mask = mask_IO_APIC_irq, 2562 .irq_mask = mask_ioapic_irq,
2751 .unmask = unmask_IO_APIC_irq, 2563 .irq_unmask = unmask_ioapic_irq,
2752#ifdef CONFIG_INTR_REMAP 2564#ifdef CONFIG_INTR_REMAP
2753 .ack = ir_ack_apic_edge, 2565 .irq_ack = ir_ack_apic_edge,
2754 .eoi = ir_ack_apic_level, 2566 .irq_eoi = ir_ack_apic_level,
2755#ifdef CONFIG_SMP 2567#ifdef CONFIG_SMP
2756 .set_affinity = set_ir_ioapic_affinity_irq, 2568 .irq_set_affinity = ir_ioapic_set_affinity,
2757#endif 2569#endif
2758#endif 2570#endif
2759 .retrigger = ioapic_retrigger_irq, 2571 .irq_retrigger = ioapic_retrigger_irq,
2760}; 2572};
2761 2573
2762static inline void init_IO_APIC_traps(void) 2574static inline void init_IO_APIC_traps(void)
2763{ 2575{
2764 int irq;
2765 struct irq_desc *desc;
2766 struct irq_cfg *cfg; 2576 struct irq_cfg *cfg;
2577 unsigned int irq;
2767 2578
2768 /* 2579 /*
2769 * NOTE! The local APIC isn't very good at handling 2580 * NOTE! The local APIC isn't very good at handling
@@ -2776,8 +2587,8 @@ static inline void init_IO_APIC_traps(void)
2776 * Also, we've got to be careful not to trash gate 2587 * Also, we've got to be careful not to trash gate
2777 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2588 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2778 */ 2589 */
2779 for_each_irq_desc(irq, desc) { 2590 for_each_active_irq(irq) {
2780 cfg = desc->chip_data; 2591 cfg = get_irq_chip_data(irq);
2781 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { 2592 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2782 /* 2593 /*
2783 * Hmm.. We don't have an entry for this, 2594 * Hmm.. We don't have an entry for this,
@@ -2788,7 +2599,7 @@ static inline void init_IO_APIC_traps(void)
2788 legacy_pic->make_irq(irq); 2599 legacy_pic->make_irq(irq);
2789 else 2600 else
2790 /* Strange. Oh, well.. */ 2601 /* Strange. Oh, well.. */
2791 desc->chip = &no_irq_chip; 2602 set_irq_chip(irq, &no_irq_chip);
2792 } 2603 }
2793 } 2604 }
2794} 2605}
@@ -2797,7 +2608,7 @@ static inline void init_IO_APIC_traps(void)
2797 * The local APIC irq-chip implementation: 2608 * The local APIC irq-chip implementation:
2798 */ 2609 */
2799 2610
2800static void mask_lapic_irq(unsigned int irq) 2611static void mask_lapic_irq(struct irq_data *data)
2801{ 2612{
2802 unsigned long v; 2613 unsigned long v;
2803 2614
@@ -2805,7 +2616,7 @@ static void mask_lapic_irq(unsigned int irq)
2805 apic_write(APIC_LVT0, v | APIC_LVT_MASKED); 2616 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
2806} 2617}
2807 2618
2808static void unmask_lapic_irq(unsigned int irq) 2619static void unmask_lapic_irq(struct irq_data *data)
2809{ 2620{
2810 unsigned long v; 2621 unsigned long v;
2811 2622
@@ -2813,21 +2624,21 @@ static void unmask_lapic_irq(unsigned int irq)
2813 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 2624 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2814} 2625}
2815 2626
2816static void ack_lapic_irq(unsigned int irq) 2627static void ack_lapic_irq(struct irq_data *data)
2817{ 2628{
2818 ack_APIC_irq(); 2629 ack_APIC_irq();
2819} 2630}
2820 2631
2821static struct irq_chip lapic_chip __read_mostly = { 2632static struct irq_chip lapic_chip __read_mostly = {
2822 .name = "local-APIC", 2633 .name = "local-APIC",
2823 .mask = mask_lapic_irq, 2634 .irq_mask = mask_lapic_irq,
2824 .unmask = unmask_lapic_irq, 2635 .irq_unmask = unmask_lapic_irq,
2825 .ack = ack_lapic_irq, 2636 .irq_ack = ack_lapic_irq,
2826}; 2637};
2827 2638
2828static void lapic_register_intr(int irq, struct irq_desc *desc) 2639static void lapic_register_intr(int irq)
2829{ 2640{
2830 desc->status &= ~IRQ_LEVEL; 2641 irq_clear_status_flags(irq, IRQ_LEVEL);
2831 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2642 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2832 "edge"); 2643 "edge");
2833} 2644}
@@ -2930,9 +2741,8 @@ int timer_through_8259 __initdata;
2930 */ 2741 */
2931static inline void __init check_timer(void) 2742static inline void __init check_timer(void)
2932{ 2743{
2933 struct irq_desc *desc = irq_to_desc(0); 2744 struct irq_cfg *cfg = get_irq_chip_data(0);
2934 struct irq_cfg *cfg = desc->chip_data; 2745 int node = cpu_to_node(0);
2935 int node = cpu_to_node(boot_cpu_id);
2936 int apic1, pin1, apic2, pin2; 2746 int apic1, pin1, apic2, pin2;
2937 unsigned long flags; 2747 unsigned long flags;
2938 int no_pin1 = 0; 2748 int no_pin1 = 0;
@@ -2942,7 +2752,7 @@ static inline void __init check_timer(void)
2942 /* 2752 /*
2943 * get/set the timer IRQ vector: 2753 * get/set the timer IRQ vector:
2944 */ 2754 */
2945 legacy_pic->chip->mask(0); 2755 legacy_pic->mask(0);
2946 assign_irq_vector(0, cfg, apic->target_cpus()); 2756 assign_irq_vector(0, cfg, apic->target_cpus());
2947 2757
2948 /* 2758 /*
@@ -3001,7 +2811,7 @@ static inline void __init check_timer(void)
3001 add_pin_to_irq_node(cfg, node, apic1, pin1); 2811 add_pin_to_irq_node(cfg, node, apic1, pin1);
3002 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2812 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
3003 } else { 2813 } else {
3004 /* for edge trigger, setup_IO_APIC_irq already 2814 /* for edge trigger, setup_ioapic_irq already
3005 * leave it unmasked. 2815 * leave it unmasked.
3006 * so only need to unmask if it is level-trigger 2816 * so only need to unmask if it is level-trigger
3007 * do we really have level trigger timer? 2817 * do we really have level trigger timer?
@@ -3009,12 +2819,12 @@ static inline void __init check_timer(void)
3009 int idx; 2819 int idx;
3010 idx = find_irq_entry(apic1, pin1, mp_INT); 2820 idx = find_irq_entry(apic1, pin1, mp_INT);
3011 if (idx != -1 && irq_trigger(idx)) 2821 if (idx != -1 && irq_trigger(idx))
3012 unmask_IO_APIC_irq_desc(desc); 2822 unmask_ioapic(cfg);
3013 } 2823 }
3014 if (timer_irq_works()) { 2824 if (timer_irq_works()) {
3015 if (nmi_watchdog == NMI_IO_APIC) { 2825 if (nmi_watchdog == NMI_IO_APIC) {
3016 setup_nmi(); 2826 setup_nmi();
3017 legacy_pic->chip->unmask(0); 2827 legacy_pic->unmask(0);
3018 } 2828 }
3019 if (disable_timer_pin_1 > 0) 2829 if (disable_timer_pin_1 > 0)
3020 clear_IO_APIC_pin(0, pin1); 2830 clear_IO_APIC_pin(0, pin1);
@@ -3037,14 +2847,14 @@ static inline void __init check_timer(void)
3037 */ 2847 */
3038 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); 2848 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
3039 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 2849 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
3040 legacy_pic->chip->unmask(0); 2850 legacy_pic->unmask(0);
3041 if (timer_irq_works()) { 2851 if (timer_irq_works()) {
3042 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2852 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
3043 timer_through_8259 = 1; 2853 timer_through_8259 = 1;
3044 if (nmi_watchdog == NMI_IO_APIC) { 2854 if (nmi_watchdog == NMI_IO_APIC) {
3045 legacy_pic->chip->mask(0); 2855 legacy_pic->mask(0);
3046 setup_nmi(); 2856 setup_nmi();
3047 legacy_pic->chip->unmask(0); 2857 legacy_pic->unmask(0);
3048 } 2858 }
3049 goto out; 2859 goto out;
3050 } 2860 }
@@ -3052,7 +2862,7 @@ static inline void __init check_timer(void)
3052 * Cleanup, just in case ... 2862 * Cleanup, just in case ...
3053 */ 2863 */
3054 local_irq_disable(); 2864 local_irq_disable();
3055 legacy_pic->chip->mask(0); 2865 legacy_pic->mask(0);
3056 clear_IO_APIC_pin(apic2, pin2); 2866 clear_IO_APIC_pin(apic2, pin2);
3057 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 2867 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
3058 } 2868 }
@@ -3069,16 +2879,16 @@ static inline void __init check_timer(void)
3069 apic_printk(APIC_QUIET, KERN_INFO 2879 apic_printk(APIC_QUIET, KERN_INFO
3070 "...trying to set up timer as Virtual Wire IRQ...\n"); 2880 "...trying to set up timer as Virtual Wire IRQ...\n");
3071 2881
3072 lapic_register_intr(0, desc); 2882 lapic_register_intr(0);
3073 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 2883 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
3074 legacy_pic->chip->unmask(0); 2884 legacy_pic->unmask(0);
3075 2885
3076 if (timer_irq_works()) { 2886 if (timer_irq_works()) {
3077 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 2887 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3078 goto out; 2888 goto out;
3079 } 2889 }
3080 local_irq_disable(); 2890 local_irq_disable();
3081 legacy_pic->chip->mask(0); 2891 legacy_pic->mask(0);
3082 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); 2892 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
3083 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); 2893 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
3084 2894
@@ -3244,49 +3054,42 @@ device_initcall(ioapic_init_sysfs);
3244/* 3054/*
3245 * Dynamic irq allocate and deallocation 3055 * Dynamic irq allocate and deallocation
3246 */ 3056 */
3247unsigned int create_irq_nr(unsigned int irq_want, int node) 3057unsigned int create_irq_nr(unsigned int from, int node)
3248{ 3058{
3249 /* Allocate an unused irq */ 3059 struct irq_cfg *cfg;
3250 unsigned int irq;
3251 unsigned int new;
3252 unsigned long flags; 3060 unsigned long flags;
3253 struct irq_cfg *cfg_new = NULL; 3061 unsigned int ret = 0;
3254 struct irq_desc *desc_new = NULL; 3062 int irq;
3255
3256 irq = 0;
3257 if (irq_want < nr_irqs_gsi)
3258 irq_want = nr_irqs_gsi;
3259
3260 raw_spin_lock_irqsave(&vector_lock, flags);
3261 for (new = irq_want; new < nr_irqs; new++) {
3262 desc_new = irq_to_desc_alloc_node(new, node);
3263 if (!desc_new) {
3264 printk(KERN_INFO "can not get irq_desc for %d\n", new);
3265 continue;
3266 }
3267 cfg_new = desc_new->chip_data;
3268
3269 if (cfg_new->vector != 0)
3270 continue;
3271 3063
3272 desc_new = move_irq_desc(desc_new, node); 3064 if (from < nr_irqs_gsi)
3273 cfg_new = desc_new->chip_data; 3065 from = nr_irqs_gsi;
3274 3066
3275 if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) 3067 irq = alloc_irq_from(from, node);
3276 irq = new; 3068 if (irq < 0)
3277 break; 3069 return 0;
3070 cfg = alloc_irq_cfg(irq, node);
3071 if (!cfg) {
3072 free_irq_at(irq, NULL);
3073 return 0;
3278 } 3074 }
3279 raw_spin_unlock_irqrestore(&vector_lock, flags);
3280 3075
3281 if (irq > 0) 3076 raw_spin_lock_irqsave(&vector_lock, flags);
3282 dynamic_irq_init_keep_chip_data(irq); 3077 if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
3078 ret = irq;
3079 raw_spin_unlock_irqrestore(&vector_lock, flags);
3283 3080
3284 return irq; 3081 if (ret) {
3082 set_irq_chip_data(irq, cfg);
3083 irq_clear_status_flags(irq, IRQ_NOREQUEST);
3084 } else {
3085 free_irq_at(irq, cfg);
3086 }
3087 return ret;
3285} 3088}
3286 3089
3287int create_irq(void) 3090int create_irq(void)
3288{ 3091{
3289 int node = cpu_to_node(boot_cpu_id); 3092 int node = cpu_to_node(0);
3290 unsigned int irq_want; 3093 unsigned int irq_want;
3291 int irq; 3094 int irq;
3292 3095
@@ -3301,14 +3104,17 @@ int create_irq(void)
3301 3104
3302void destroy_irq(unsigned int irq) 3105void destroy_irq(unsigned int irq)
3303{ 3106{
3107 struct irq_cfg *cfg = get_irq_chip_data(irq);
3304 unsigned long flags; 3108 unsigned long flags;
3305 3109
3306 dynamic_irq_cleanup_keep_chip_data(irq); 3110 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
3307 3111
3308 free_irte(irq); 3112 if (intr_remapping_enabled)
3113 free_irte(irq);
3309 raw_spin_lock_irqsave(&vector_lock, flags); 3114 raw_spin_lock_irqsave(&vector_lock, flags);
3310 __clear_irq_vector(irq, get_irq_chip_data(irq)); 3115 __clear_irq_vector(irq, cfg);
3311 raw_spin_unlock_irqrestore(&vector_lock, flags); 3116 raw_spin_unlock_irqrestore(&vector_lock, flags);
3117 free_irq_at(irq, cfg);
3312} 3118}
3313 3119
3314/* 3120/*
@@ -3332,7 +3138,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3332 3138
3333 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); 3139 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3334 3140
3335 if (irq_remapped(irq)) { 3141 if (irq_remapped(get_irq_chip_data(irq))) {
3336 struct irte irte; 3142 struct irte irte;
3337 int ir_index; 3143 int ir_index;
3338 u16 sub_handle; 3144 u16 sub_handle;
@@ -3340,14 +3146,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3340 ir_index = map_irq_to_irte_handle(irq, &sub_handle); 3146 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
3341 BUG_ON(ir_index == -1); 3147 BUG_ON(ir_index == -1);
3342 3148
3343 memset (&irte, 0, sizeof(irte)); 3149 prepare_irte(&irte, cfg->vector, dest);
3344
3345 irte.present = 1;
3346 irte.dst_mode = apic->irq_dest_mode;
3347 irte.trigger_mode = 0; /* edge */
3348 irte.dlvry_mode = apic->irq_delivery_mode;
3349 irte.vector = cfg->vector;
3350 irte.dest_id = IRTE_DEST(dest);
3351 3150
3352 /* Set source-id of interrupt request */ 3151 /* Set source-id of interrupt request */
3353 if (pdev) 3152 if (pdev)
@@ -3392,26 +3191,24 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3392} 3191}
3393 3192
3394#ifdef CONFIG_SMP 3193#ifdef CONFIG_SMP
3395static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) 3194static int
3195msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3396{ 3196{
3397 struct irq_desc *desc = irq_to_desc(irq); 3197 struct irq_cfg *cfg = data->chip_data;
3398 struct irq_cfg *cfg;
3399 struct msi_msg msg; 3198 struct msi_msg msg;
3400 unsigned int dest; 3199 unsigned int dest;
3401 3200
3402 if (set_desc_affinity(desc, mask, &dest)) 3201 if (__ioapic_set_affinity(data, mask, &dest))
3403 return -1; 3202 return -1;
3404 3203
3405 cfg = desc->chip_data; 3204 __get_cached_msi_msg(data->msi_desc, &msg);
3406
3407 get_cached_msi_msg_desc(desc, &msg);
3408 3205
3409 msg.data &= ~MSI_DATA_VECTOR_MASK; 3206 msg.data &= ~MSI_DATA_VECTOR_MASK;
3410 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3207 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3411 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3208 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3412 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3209 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3413 3210
3414 write_msi_msg_desc(desc, &msg); 3211 __write_msi_msg(data->msi_desc, &msg);
3415 3212
3416 return 0; 3213 return 0;
3417} 3214}
@@ -3421,17 +3218,17 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3421 * done in the process context using interrupt-remapping hardware. 3218 * done in the process context using interrupt-remapping hardware.
3422 */ 3219 */
3423static int 3220static int
3424ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) 3221ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3222 bool force)
3425{ 3223{
3426 struct irq_desc *desc = irq_to_desc(irq); 3224 struct irq_cfg *cfg = data->chip_data;
3427 struct irq_cfg *cfg = desc->chip_data; 3225 unsigned int dest, irq = data->irq;
3428 unsigned int dest;
3429 struct irte irte; 3226 struct irte irte;
3430 3227
3431 if (get_irte(irq, &irte)) 3228 if (get_irte(irq, &irte))
3432 return -1; 3229 return -1;
3433 3230
3434 if (set_desc_affinity(desc, mask, &dest)) 3231 if (__ioapic_set_affinity(data, mask, &dest))
3435 return -1; 3232 return -1;
3436 3233
3437 irte.vector = cfg->vector; 3234 irte.vector = cfg->vector;
@@ -3461,27 +3258,27 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3461 * which implement the MSI or MSI-X Capability Structure. 3258 * which implement the MSI or MSI-X Capability Structure.
3462 */ 3259 */
3463static struct irq_chip msi_chip = { 3260static struct irq_chip msi_chip = {
3464 .name = "PCI-MSI", 3261 .name = "PCI-MSI",
3465 .unmask = unmask_msi_irq, 3262 .irq_unmask = unmask_msi_irq,
3466 .mask = mask_msi_irq, 3263 .irq_mask = mask_msi_irq,
3467 .ack = ack_apic_edge, 3264 .irq_ack = ack_apic_edge,
3468#ifdef CONFIG_SMP 3265#ifdef CONFIG_SMP
3469 .set_affinity = set_msi_irq_affinity, 3266 .irq_set_affinity = msi_set_affinity,
3470#endif 3267#endif
3471 .retrigger = ioapic_retrigger_irq, 3268 .irq_retrigger = ioapic_retrigger_irq,
3472}; 3269};
3473 3270
3474static struct irq_chip msi_ir_chip = { 3271static struct irq_chip msi_ir_chip = {
3475 .name = "IR-PCI-MSI", 3272 .name = "IR-PCI-MSI",
3476 .unmask = unmask_msi_irq, 3273 .irq_unmask = unmask_msi_irq,
3477 .mask = mask_msi_irq, 3274 .irq_mask = mask_msi_irq,
3478#ifdef CONFIG_INTR_REMAP 3275#ifdef CONFIG_INTR_REMAP
3479 .ack = ir_ack_apic_edge, 3276 .irq_ack = ir_ack_apic_edge,
3480#ifdef CONFIG_SMP 3277#ifdef CONFIG_SMP
3481 .set_affinity = ir_set_msi_irq_affinity, 3278 .irq_set_affinity = ir_msi_set_affinity,
3482#endif 3279#endif
3483#endif 3280#endif
3484 .retrigger = ioapic_retrigger_irq, 3281 .irq_retrigger = ioapic_retrigger_irq,
3485}; 3282};
3486 3283
3487/* 3284/*
@@ -3513,8 +3310,8 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3513 3310
3514static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3311static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3515{ 3312{
3516 int ret;
3517 struct msi_msg msg; 3313 struct msi_msg msg;
3314 int ret;
3518 3315
3519 ret = msi_compose_msg(dev, irq, &msg, -1); 3316 ret = msi_compose_msg(dev, irq, &msg, -1);
3520 if (ret < 0) 3317 if (ret < 0)
@@ -3523,12 +3320,8 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3523 set_irq_msi(irq, msidesc); 3320 set_irq_msi(irq, msidesc);
3524 write_msi_msg(irq, &msg); 3321 write_msi_msg(irq, &msg);
3525 3322
3526 if (irq_remapped(irq)) { 3323 if (irq_remapped(get_irq_chip_data(irq))) {
3527 struct irq_desc *desc = irq_to_desc(irq); 3324 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3528 /*
3529 * irq migration in process context
3530 */
3531 desc->status |= IRQ_MOVE_PCNTXT;
3532 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); 3325 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
3533 } else 3326 } else
3534 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 3327 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
@@ -3540,13 +3333,10 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3540 3333
3541int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3334int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3542{ 3335{
3543 unsigned int irq; 3336 int node, ret, sub_handle, index = 0;
3544 int ret, sub_handle; 3337 unsigned int irq, irq_want;
3545 struct msi_desc *msidesc; 3338 struct msi_desc *msidesc;
3546 unsigned int irq_want;
3547 struct intel_iommu *iommu = NULL; 3339 struct intel_iommu *iommu = NULL;
3548 int index = 0;
3549 int node;
3550 3340
3551 /* x86 doesn't support multiple MSI yet */ 3341 /* x86 doesn't support multiple MSI yet */
3552 if (type == PCI_CAP_ID_MSI && nvec > 1) 3342 if (type == PCI_CAP_ID_MSI && nvec > 1)
@@ -3606,18 +3396,17 @@ void arch_teardown_msi_irq(unsigned int irq)
3606 3396
3607#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) 3397#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
3608#ifdef CONFIG_SMP 3398#ifdef CONFIG_SMP
3609static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3399static int
3400dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3401 bool force)
3610{ 3402{
3611 struct irq_desc *desc = irq_to_desc(irq); 3403 struct irq_cfg *cfg = data->chip_data;
3612 struct irq_cfg *cfg; 3404 unsigned int dest, irq = data->irq;
3613 struct msi_msg msg; 3405 struct msi_msg msg;
3614 unsigned int dest;
3615 3406
3616 if (set_desc_affinity(desc, mask, &dest)) 3407 if (__ioapic_set_affinity(data, mask, &dest))
3617 return -1; 3408 return -1;
3618 3409
3619 cfg = desc->chip_data;
3620
3621 dmar_msi_read(irq, &msg); 3410 dmar_msi_read(irq, &msg);
3622 3411
3623 msg.data &= ~MSI_DATA_VECTOR_MASK; 3412 msg.data &= ~MSI_DATA_VECTOR_MASK;
@@ -3633,14 +3422,14 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3633#endif /* CONFIG_SMP */ 3422#endif /* CONFIG_SMP */
3634 3423
3635static struct irq_chip dmar_msi_type = { 3424static struct irq_chip dmar_msi_type = {
3636 .name = "DMAR_MSI", 3425 .name = "DMAR_MSI",
3637 .unmask = dmar_msi_unmask, 3426 .irq_unmask = dmar_msi_unmask,
3638 .mask = dmar_msi_mask, 3427 .irq_mask = dmar_msi_mask,
3639 .ack = ack_apic_edge, 3428 .irq_ack = ack_apic_edge,
3640#ifdef CONFIG_SMP 3429#ifdef CONFIG_SMP
3641 .set_affinity = dmar_msi_set_affinity, 3430 .irq_set_affinity = dmar_msi_set_affinity,
3642#endif 3431#endif
3643 .retrigger = ioapic_retrigger_irq, 3432 .irq_retrigger = ioapic_retrigger_irq,
3644}; 3433};
3645 3434
3646int arch_setup_dmar_msi(unsigned int irq) 3435int arch_setup_dmar_msi(unsigned int irq)
@@ -3661,26 +3450,24 @@ int arch_setup_dmar_msi(unsigned int irq)
3661#ifdef CONFIG_HPET_TIMER 3450#ifdef CONFIG_HPET_TIMER
3662 3451
3663#ifdef CONFIG_SMP 3452#ifdef CONFIG_SMP
3664static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3453static int hpet_msi_set_affinity(struct irq_data *data,
3454 const struct cpumask *mask, bool force)
3665{ 3455{
3666 struct irq_desc *desc = irq_to_desc(irq); 3456 struct irq_cfg *cfg = data->chip_data;
3667 struct irq_cfg *cfg;
3668 struct msi_msg msg; 3457 struct msi_msg msg;
3669 unsigned int dest; 3458 unsigned int dest;
3670 3459
3671 if (set_desc_affinity(desc, mask, &dest)) 3460 if (__ioapic_set_affinity(data, mask, &dest))
3672 return -1; 3461 return -1;
3673 3462
3674 cfg = desc->chip_data; 3463 hpet_msi_read(data->handler_data, &msg);
3675
3676 hpet_msi_read(irq, &msg);
3677 3464
3678 msg.data &= ~MSI_DATA_VECTOR_MASK; 3465 msg.data &= ~MSI_DATA_VECTOR_MASK;
3679 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3466 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3680 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3467 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3681 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3468 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3682 3469
3683 hpet_msi_write(irq, &msg); 3470 hpet_msi_write(data->handler_data, &msg);
3684 3471
3685 return 0; 3472 return 0;
3686} 3473}
@@ -3688,34 +3475,33 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3688#endif /* CONFIG_SMP */ 3475#endif /* CONFIG_SMP */
3689 3476
3690static struct irq_chip ir_hpet_msi_type = { 3477static struct irq_chip ir_hpet_msi_type = {
3691 .name = "IR-HPET_MSI", 3478 .name = "IR-HPET_MSI",
3692 .unmask = hpet_msi_unmask, 3479 .irq_unmask = hpet_msi_unmask,
3693 .mask = hpet_msi_mask, 3480 .irq_mask = hpet_msi_mask,
3694#ifdef CONFIG_INTR_REMAP 3481#ifdef CONFIG_INTR_REMAP
3695 .ack = ir_ack_apic_edge, 3482 .irq_ack = ir_ack_apic_edge,
3696#ifdef CONFIG_SMP 3483#ifdef CONFIG_SMP
3697 .set_affinity = ir_set_msi_irq_affinity, 3484 .irq_set_affinity = ir_msi_set_affinity,
3698#endif 3485#endif
3699#endif 3486#endif
3700 .retrigger = ioapic_retrigger_irq, 3487 .irq_retrigger = ioapic_retrigger_irq,
3701}; 3488};
3702 3489
3703static struct irq_chip hpet_msi_type = { 3490static struct irq_chip hpet_msi_type = {
3704 .name = "HPET_MSI", 3491 .name = "HPET_MSI",
3705 .unmask = hpet_msi_unmask, 3492 .irq_unmask = hpet_msi_unmask,
3706 .mask = hpet_msi_mask, 3493 .irq_mask = hpet_msi_mask,
3707 .ack = ack_apic_edge, 3494 .irq_ack = ack_apic_edge,
3708#ifdef CONFIG_SMP 3495#ifdef CONFIG_SMP
3709 .set_affinity = hpet_msi_set_affinity, 3496 .irq_set_affinity = hpet_msi_set_affinity,
3710#endif 3497#endif
3711 .retrigger = ioapic_retrigger_irq, 3498 .irq_retrigger = ioapic_retrigger_irq,
3712}; 3499};
3713 3500
3714int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 3501int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3715{ 3502{
3716 int ret;
3717 struct msi_msg msg; 3503 struct msi_msg msg;
3718 struct irq_desc *desc = irq_to_desc(irq); 3504 int ret;
3719 3505
3720 if (intr_remapping_enabled) { 3506 if (intr_remapping_enabled) {
3721 struct intel_iommu *iommu = map_hpet_to_ir(id); 3507 struct intel_iommu *iommu = map_hpet_to_ir(id);
@@ -3733,9 +3519,9 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3733 if (ret < 0) 3519 if (ret < 0)
3734 return ret; 3520 return ret;
3735 3521
3736 hpet_msi_write(irq, &msg); 3522 hpet_msi_write(get_irq_data(irq), &msg);
3737 desc->status |= IRQ_MOVE_PCNTXT; 3523 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3738 if (irq_remapped(irq)) 3524 if (irq_remapped(get_irq_chip_data(irq)))
3739 set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, 3525 set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
3740 handle_edge_irq, "edge"); 3526 handle_edge_irq, "edge");
3741 else 3527 else
@@ -3768,33 +3554,30 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3768 write_ht_irq_msg(irq, &msg); 3554 write_ht_irq_msg(irq, &msg);
3769} 3555}
3770 3556
3771static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) 3557static int
3558ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3772{ 3559{
3773 struct irq_desc *desc = irq_to_desc(irq); 3560 struct irq_cfg *cfg = data->chip_data;
3774 struct irq_cfg *cfg;
3775 unsigned int dest; 3561 unsigned int dest;
3776 3562
3777 if (set_desc_affinity(desc, mask, &dest)) 3563 if (__ioapic_set_affinity(data, mask, &dest))
3778 return -1; 3564 return -1;
3779 3565
3780 cfg = desc->chip_data; 3566 target_ht_irq(data->irq, dest, cfg->vector);
3781
3782 target_ht_irq(irq, dest, cfg->vector);
3783
3784 return 0; 3567 return 0;
3785} 3568}
3786 3569
3787#endif 3570#endif
3788 3571
3789static struct irq_chip ht_irq_chip = { 3572static struct irq_chip ht_irq_chip = {
3790 .name = "PCI-HT", 3573 .name = "PCI-HT",
3791 .mask = mask_ht_irq, 3574 .irq_mask = mask_ht_irq,
3792 .unmask = unmask_ht_irq, 3575 .irq_unmask = unmask_ht_irq,
3793 .ack = ack_apic_edge, 3576 .irq_ack = ack_apic_edge,
3794#ifdef CONFIG_SMP 3577#ifdef CONFIG_SMP
3795 .set_affinity = set_ht_irq_affinity, 3578 .irq_set_affinity = ht_set_affinity,
3796#endif 3579#endif
3797 .retrigger = ioapic_retrigger_irq, 3580 .irq_retrigger = ioapic_retrigger_irq,
3798}; 3581};
3799 3582
3800int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) 3583int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
@@ -3885,14 +3668,13 @@ int __init arch_probe_nr_irqs(void)
3885 if (nr < nr_irqs) 3668 if (nr < nr_irqs)
3886 nr_irqs = nr; 3669 nr_irqs = nr;
3887 3670
3888 return 0; 3671 return NR_IRQS_LEGACY;
3889} 3672}
3890#endif 3673#endif
3891 3674
3892static int __io_apic_set_pci_routing(struct device *dev, int irq, 3675static int __io_apic_set_pci_routing(struct device *dev, int irq,
3893 struct io_apic_irq_attr *irq_attr) 3676 struct io_apic_irq_attr *irq_attr)
3894{ 3677{
3895 struct irq_desc *desc;
3896 struct irq_cfg *cfg; 3678 struct irq_cfg *cfg;
3897 int node; 3679 int node;
3898 int ioapic, pin; 3680 int ioapic, pin;
@@ -3908,13 +3690,11 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
3908 if (dev) 3690 if (dev)
3909 node = dev_to_node(dev); 3691 node = dev_to_node(dev);
3910 else 3692 else
3911 node = cpu_to_node(boot_cpu_id); 3693 node = cpu_to_node(0);
3912 3694
3913 desc = irq_to_desc_alloc_node(irq, node); 3695 cfg = alloc_irq_and_cfg_at(irq, node);
3914 if (!desc) { 3696 if (!cfg)
3915 printk(KERN_INFO "can not get irq_desc %d\n", irq);
3916 return 0; 3697 return 0;
3917 }
3918 3698
3919 pin = irq_attr->ioapic_pin; 3699 pin = irq_attr->ioapic_pin;
3920 trigger = irq_attr->trigger; 3700 trigger = irq_attr->trigger;
@@ -3924,15 +3704,14 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
3924 * IRQs < 16 are already in the irq_2_pin[] map 3704 * IRQs < 16 are already in the irq_2_pin[] map
3925 */ 3705 */
3926 if (irq >= legacy_pic->nr_legacy_irqs) { 3706 if (irq >= legacy_pic->nr_legacy_irqs) {
3927 cfg = desc->chip_data; 3707 if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) {
3928 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
3929 printk(KERN_INFO "can not add pin %d for irq %d\n", 3708 printk(KERN_INFO "can not add pin %d for irq %d\n",
3930 pin, irq); 3709 pin, irq);
3931 return 0; 3710 return 0;
3932 } 3711 }
3933 } 3712 }
3934 3713
3935 setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); 3714 setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity);
3936 3715
3937 return 0; 3716 return 0;
3938} 3717}
@@ -4125,14 +3904,14 @@ void __init setup_ioapic_dest(void)
4125 */ 3904 */
4126 if (desc->status & 3905 if (desc->status &
4127 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 3906 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4128 mask = desc->affinity; 3907 mask = desc->irq_data.affinity;
4129 else 3908 else
4130 mask = apic->target_cpus(); 3909 mask = apic->target_cpus();
4131 3910
4132 if (intr_remapping_enabled) 3911 if (intr_remapping_enabled)
4133 set_ir_ioapic_affinity_irq_desc(desc, mask); 3912 ir_ioapic_set_affinity(&desc->irq_data, mask, false);
4134 else 3913 else
4135 set_ioapic_affinity_irq_desc(desc, mask); 3914 ioapic_set_affinity(&desc->irq_data, mask, false);
4136 } 3915 }
4137 3916
4138} 3917}
@@ -4316,19 +4095,18 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4316void __init pre_init_apic_IRQ0(void) 4095void __init pre_init_apic_IRQ0(void)
4317{ 4096{
4318 struct irq_cfg *cfg; 4097 struct irq_cfg *cfg;
4319 struct irq_desc *desc;
4320 4098
4321 printk(KERN_INFO "Early APIC setup for system timer0\n"); 4099 printk(KERN_INFO "Early APIC setup for system timer0\n");
4322#ifndef CONFIG_SMP 4100#ifndef CONFIG_SMP
4323 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 4101 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
4324#endif 4102#endif
4325 desc = irq_to_desc_alloc_node(0, 0); 4103 /* Make sure the irq descriptor is set up */
4104 cfg = alloc_irq_and_cfg_at(0, 0);
4326 4105
4327 setup_local_APIC(); 4106 setup_local_APIC();
4328 4107
4329 cfg = irq_cfg(0);
4330 add_pin_to_irq_node(cfg, 0, 0, 0); 4108 add_pin_to_irq_node(cfg, 0, 0, 0);
4331 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); 4109 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
4332 4110
4333 setup_IO_APIC_irq(0, 0, 0, desc, 0, 0); 4111 setup_ioapic_irq(0, 0, 0, cfg, 0, 0);
4334} 4112}
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index a43f71cb30f8..c90041ccb742 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -178,7 +178,7 @@ int __init check_nmi_watchdog(void)
178error: 178error:
179 if (nmi_watchdog == NMI_IO_APIC) { 179 if (nmi_watchdog == NMI_IO_APIC) {
180 if (!timer_through_8259) 180 if (!timer_through_8259)
181 legacy_pic->chip->mask(0); 181 legacy_pic->mask(0);
182 on_each_cpu(__acpi_nmi_disable, NULL, 1); 182 on_each_cpu(__acpi_nmi_disable, NULL, 1);
183 } 183 }
184 184
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 83e9be4778e2..f9e4e6a54073 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -54,6 +54,9 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
54 */ 54 */
55void __init default_setup_apic_routing(void) 55void __init default_setup_apic_routing(void)
56{ 56{
57
58 enable_IR_x2apic();
59
57#ifdef CONFIG_X86_X2APIC 60#ifdef CONFIG_X86_X2APIC
58 if (x2apic_mode 61 if (x2apic_mode
59#ifdef CONFIG_X86_UV 62#ifdef CONFIG_X86_UV
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index ba5f62f45f01..9e093f8fe78c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -148,7 +148,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
148{ 148{
149#ifdef CONFIG_SMP 149#ifdef CONFIG_SMP
150 /* calling is from identify_secondary_cpu() ? */ 150 /* calling is from identify_secondary_cpu() ? */
151 if (c->cpu_index == boot_cpu_id) 151 if (!c->cpu_index)
152 return; 152 return;
153 153
154 /* 154 /*
@@ -253,37 +253,51 @@ static int __cpuinit nearby_node(int apicid)
253#endif 253#endif
254 254
255/* 255/*
256 * Fixup core topology information for AMD multi-node processors. 256 * Fixup core topology information for
257 * Assumption: Number of cores in each internal node is the same. 257 * (1) AMD multi-node processors
258 * Assumption: Number of cores in each internal node is the same.
259 * (2) AMD processors supporting compute units
258 */ 260 */
259#ifdef CONFIG_X86_HT 261#ifdef CONFIG_X86_HT
260static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) 262static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
261{ 263{
262 unsigned long long value; 264 u32 nodes;
263 u32 nodes, cores_per_node; 265 u8 node_id;
264 int cpu = smp_processor_id(); 266 int cpu = smp_processor_id();
265 267
266 if (!cpu_has(c, X86_FEATURE_NODEID_MSR)) 268 /* get information required for multi-node processors */
267 return; 269 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
270 u32 eax, ebx, ecx, edx;
268 271
269 /* fixup topology information only once for a core */ 272 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
270 if (cpu_has(c, X86_FEATURE_AMD_DCM)) 273 nodes = ((ecx >> 8) & 7) + 1;
271 return; 274 node_id = ecx & 7;
272 275
273 rdmsrl(MSR_FAM10H_NODE_ID, value); 276 /* get compute unit information */
277 smp_num_siblings = ((ebx >> 8) & 3) + 1;
278 c->compute_unit_id = ebx & 0xff;
279 } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
280 u64 value;
274 281
275 nodes = ((value >> 3) & 7) + 1; 282 rdmsrl(MSR_FAM10H_NODE_ID, value);
276 if (nodes == 1) 283 nodes = ((value >> 3) & 7) + 1;
284 node_id = value & 7;
285 } else
277 return; 286 return;
278 287
279 set_cpu_cap(c, X86_FEATURE_AMD_DCM); 288 /* fixup multi-node processor information */
280 cores_per_node = c->x86_max_cores / nodes; 289 if (nodes > 1) {
290 u32 cores_per_node;
291
292 set_cpu_cap(c, X86_FEATURE_AMD_DCM);
293 cores_per_node = c->x86_max_cores / nodes;
281 294
282 /* store NodeID, use llc_shared_map to store sibling info */ 295 /* store NodeID, use llc_shared_map to store sibling info */
283 per_cpu(cpu_llc_id, cpu) = value & 7; 296 per_cpu(cpu_llc_id, cpu) = node_id;
284 297
285 /* fixup core id to be in range from 0 to (cores_per_node - 1) */ 298 /* core id to be in range from 0 to (cores_per_node - 1) */
286 c->cpu_core_id = c->cpu_core_id % cores_per_node; 299 c->cpu_core_id = c->cpu_core_id % cores_per_node;
300 }
287} 301}
288#endif 302#endif
289 303
@@ -304,9 +318,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
304 c->phys_proc_id = c->initial_apicid >> bits; 318 c->phys_proc_id = c->initial_apicid >> bits;
305 /* use socket ID also for last level cache */ 319 /* use socket ID also for last level cache */
306 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; 320 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
307 /* fixup topology information on multi-node processors */ 321 amd_get_topology(c);
308 if ((c->x86 == 0x10) && (c->x86_model == 9))
309 amd_fixup_dcm(c);
310#endif 322#endif
311} 323}
312 324
@@ -412,6 +424,23 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
412 set_cpu_cap(c, X86_FEATURE_EXTD_APICID); 424 set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
413 } 425 }
414#endif 426#endif
427
428 /* We need to do the following only once */
429 if (c != &boot_cpu_data)
430 return;
431
432 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
433
434 if (c->x86 > 0x10 ||
435 (c->x86 == 0x10 && c->x86_model >= 0x2)) {
436 u64 val;
437
438 rdmsrl(MSR_K7_HWCR, val);
439 if (!(val & BIT(24)))
440 printk(KERN_WARNING FW_BUG "TSC doesn't count "
441 "with P0 frequency!\n");
442 }
443 }
415} 444}
416 445
417static void __cpuinit init_amd(struct cpuinfo_x86 *c) 446static void __cpuinit init_amd(struct cpuinfo_x86 *c)
@@ -523,7 +552,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
523#endif 552#endif
524 553
525 if (c->extended_cpuid_level >= 0x80000006) { 554 if (c->extended_cpuid_level >= 0x80000006) {
526 if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) 555 if (cpuid_edx(0x80000006) & 0xf000)
527 num_cache_leaves = 4; 556 num_cache_leaves = 4;
528 else 557 else
529 num_cache_leaves = 3; 558 num_cache_leaves = 3;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f2f9ac7da25c..4b68bda30938 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -665,7 +665,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
665 this_cpu->c_early_init(c); 665 this_cpu->c_early_init(c);
666 666
667#ifdef CONFIG_SMP 667#ifdef CONFIG_SMP
668 c->cpu_index = boot_cpu_id; 668 c->cpu_index = 0;
669#endif 669#endif
670 filter_cpuid_features(c, false); 670 filter_cpuid_features(c, false);
671} 671}
@@ -704,16 +704,21 @@ void __init early_cpu_init(void)
704} 704}
705 705
706/* 706/*
707 * The NOPL instruction is supposed to exist on all CPUs with 707 * The NOPL instruction is supposed to exist on all CPUs of family >= 6;
708 * family >= 6; unfortunately, that's not true in practice because 708 * unfortunately, that's not true in practice because of early VIA
709 * of early VIA chips and (more importantly) broken virtualizers that 709 * chips and (more importantly) broken virtualizers that are not easy
710 * are not easy to detect. In the latter case it doesn't even *fail* 710 * to detect. In the latter case it doesn't even *fail* reliably, so
711 * reliably, so probing for it doesn't even work. Disable it completely 711 * probing for it doesn't even work. Disable it completely on 32-bit
712 * unless we can find a reliable way to detect all the broken cases. 712 * unless we can find a reliable way to detect all the broken cases.
713 * Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
713 */ 714 */
714static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) 715static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
715{ 716{
717#ifdef CONFIG_X86_32
716 clear_cpu_cap(c, X86_FEATURE_NOPL); 718 clear_cpu_cap(c, X86_FEATURE_NOPL);
719#else
720 set_cpu_cap(c, X86_FEATURE_NOPL);
721#endif
717} 722}
718 723
719static void __cpuinit generic_identify(struct cpuinfo_x86 *c) 724static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
@@ -1264,13 +1269,6 @@ void __cpuinit cpu_init(void)
1264 clear_all_debug_regs(); 1269 clear_all_debug_regs();
1265 dbg_restore_debug_regs(); 1270 dbg_restore_debug_regs();
1266 1271
1267 /*
1268 * Force FPU initialization:
1269 */
1270 current_thread_info()->status = 0;
1271 clear_used_math();
1272 mxcsr_feature_mask_init();
1273
1274 fpu_init(); 1272 fpu_init();
1275 xsave_init(); 1273 xsave_init();
1276} 1274}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index f668bb1f7d43..e765633f210e 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -32,6 +32,7 @@ struct cpu_dev {
32extern const struct cpu_dev *const __x86_cpu_dev_start[], 32extern const struct cpu_dev *const __x86_cpu_dev_start[],
33 *const __x86_cpu_dev_end[]; 33 *const __x86_cpu_dev_end[];
34 34
35extern void get_cpu_cap(struct cpuinfo_x86 *c);
35extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); 36extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
36extern void get_cpu_cap(struct cpuinfo_x86 *c); 37extern void get_cpu_cap(struct cpuinfo_x86 *c);
37 38
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b4389441efbb..695f17731e23 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -170,7 +170,7 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
170{ 170{
171#ifdef CONFIG_SMP 171#ifdef CONFIG_SMP
172 /* calling is from identify_secondary_cpu() ? */ 172 /* calling is from identify_secondary_cpu() ? */
173 if (c->cpu_index == boot_cpu_id) 173 if (!c->cpu_index)
174 return; 174 return;
175 175
176 /* 176 /*
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 898c2f4eab88..12cd823c8d03 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -17,7 +17,7 @@
17 17
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <asm/k8.h> 20#include <asm/amd_nb.h>
21#include <asm/smp.h> 21#include <asm/smp.h>
22 22
23#define LVL_1_INST 1 23#define LVL_1_INST 1
@@ -306,7 +306,7 @@ struct _cache_attr {
306 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); 306 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
307}; 307};
308 308
309#ifdef CONFIG_CPU_SUP_AMD 309#ifdef CONFIG_AMD_NB
310 310
311/* 311/*
312 * L3 cache descriptors 312 * L3 cache descriptors
@@ -369,7 +369,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
369 return; 369 return;
370 370
371 /* not in virtualized environments */ 371 /* not in virtualized environments */
372 if (num_k8_northbridges == 0) 372 if (k8_northbridges.num == 0)
373 return; 373 return;
374 374
375 /* 375 /*
@@ -377,7 +377,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
377 * never freed but this is done only on shutdown so it doesn't matter. 377 * never freed but this is done only on shutdown so it doesn't matter.
378 */ 378 */
379 if (!l3_caches) { 379 if (!l3_caches) {
380 int size = num_k8_northbridges * sizeof(struct amd_l3_cache *); 380 int size = k8_northbridges.num * sizeof(struct amd_l3_cache *);
381 381
382 l3_caches = kzalloc(size, GFP_ATOMIC); 382 l3_caches = kzalloc(size, GFP_ATOMIC);
383 if (!l3_caches) 383 if (!l3_caches)
@@ -556,12 +556,12 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
556static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, 556static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
557 show_cache_disable_1, store_cache_disable_1); 557 show_cache_disable_1, store_cache_disable_1);
558 558
559#else /* CONFIG_CPU_SUP_AMD */ 559#else /* CONFIG_AMD_NB */
560static void __cpuinit 560static void __cpuinit
561amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) 561amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
562{ 562{
563}; 563};
564#endif /* CONFIG_CPU_SUP_AMD */ 564#endif /* CONFIG_AMD_NB */
565 565
566static int 566static int
567__cpuinit cpuid4_cache_lookup_regs(int index, 567__cpuinit cpuid4_cache_lookup_regs(int index,
@@ -1000,7 +1000,7 @@ static struct attribute *default_attrs[] = {
1000 1000
1001static struct attribute *default_l3_attrs[] = { 1001static struct attribute *default_l3_attrs[] = {
1002 DEFAULT_SYSFS_CACHE_ATTRS, 1002 DEFAULT_SYSFS_CACHE_ATTRS,
1003#ifdef CONFIG_CPU_SUP_AMD 1003#ifdef CONFIG_AMD_NB
1004 &cache_disable_0.attr, 1004 &cache_disable_0.attr,
1005 &cache_disable_1.attr, 1005 &cache_disable_1.attr,
1006#endif 1006#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5e975298fa81..80c482382d5c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -131,7 +131,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
131 u32 low = 0, high = 0, address = 0; 131 u32 low = 0, high = 0, address = 0;
132 unsigned int bank, block; 132 unsigned int bank, block;
133 struct thresh_restart tr; 133 struct thresh_restart tr;
134 u8 lvt_off; 134 int lvt_off = -1;
135 u8 offset;
135 136
136 for (bank = 0; bank < NR_BANKS; ++bank) { 137 for (bank = 0; bank < NR_BANKS; ++bank) {
137 for (block = 0; block < NR_BLOCKS; ++block) { 138 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -141,6 +142,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
141 address = (low & MASK_BLKPTR_LO) >> 21; 142 address = (low & MASK_BLKPTR_LO) >> 21;
142 if (!address) 143 if (!address)
143 break; 144 break;
145
144 address += MCG_XBLK_ADDR; 146 address += MCG_XBLK_ADDR;
145 } else 147 } else
146 ++address; 148 ++address;
@@ -148,12 +150,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
148 if (rdmsr_safe(address, &low, &high)) 150 if (rdmsr_safe(address, &low, &high))
149 break; 151 break;
150 152
151 if (!(high & MASK_VALID_HI)) { 153 if (!(high & MASK_VALID_HI))
152 if (block) 154 continue;
153 continue;
154 else
155 break;
156 }
157 155
158 if (!(high & MASK_CNTP_HI) || 156 if (!(high & MASK_CNTP_HI) ||
159 (high & MASK_LOCKED_HI)) 157 (high & MASK_LOCKED_HI))
@@ -165,8 +163,28 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
165 if (shared_bank[bank] && c->cpu_core_id) 163 if (shared_bank[bank] && c->cpu_core_id)
166 break; 164 break;
167#endif 165#endif
168 lvt_off = setup_APIC_eilvt_mce(THRESHOLD_APIC_VECTOR, 166 offset = (high & MASK_LVTOFF_HI) >> 20;
169 APIC_EILVT_MSG_FIX, 0); 167 if (lvt_off < 0) {
168 if (setup_APIC_eilvt(offset,
169 THRESHOLD_APIC_VECTOR,
170 APIC_EILVT_MSG_FIX, 0)) {
171 pr_err(FW_BUG "cpu %d, failed to "
172 "setup threshold interrupt "
173 "for bank %d, block %d "
174 "(MSR%08X=0x%x%08x)",
175 smp_processor_id(), bank, block,
176 address, high, low);
177 continue;
178 }
179 lvt_off = offset;
180 } else if (lvt_off != offset) {
181 pr_err(FW_BUG "cpu %d, invalid threshold "
182 "interrupt offset %d for bank %d,"
183 "block %d (MSR%08X=0x%x%08x)",
184 smp_processor_id(), lvt_off, bank,
185 block, address, high, low);
186 continue;
187 }
170 188
171 high &= ~MASK_LVTOFF_HI; 189 high &= ~MASK_LVTOFF_HI;
172 high |= lvt_off << 20; 190 high |= lvt_off << 20;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index d9368eeda309..4b683267eca5 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -216,7 +216,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
216 err = sysfs_add_file_to_group(&sys_dev->kobj, 216 err = sysfs_add_file_to_group(&sys_dev->kobj,
217 &attr_core_power_limit_count.attr, 217 &attr_core_power_limit_count.attr,
218 thermal_attr_group.name); 218 thermal_attr_group.name);
219 if (cpu_has(c, X86_FEATURE_PTS)) 219 if (cpu_has(c, X86_FEATURE_PTS)) {
220 err = sysfs_add_file_to_group(&sys_dev->kobj, 220 err = sysfs_add_file_to_group(&sys_dev->kobj,
221 &attr_package_throttle_count.attr, 221 &attr_package_throttle_count.attr,
222 thermal_attr_group.name); 222 thermal_attr_group.name);
@@ -224,6 +224,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
224 err = sysfs_add_file_to_group(&sys_dev->kobj, 224 err = sysfs_add_file_to_group(&sys_dev->kobj,
225 &attr_package_power_limit_count.attr, 225 &attr_package_power_limit_count.attr,
226 thermal_attr_group.name); 226 thermal_attr_group.name);
227 }
227 228
228 return err; 229 return err;
229} 230}
@@ -349,7 +350,7 @@ static void intel_thermal_interrupt(void)
349 350
350static void unexpected_thermal_interrupt(void) 351static void unexpected_thermal_interrupt(void)
351{ 352{
352 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", 353 printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
353 smp_processor_id()); 354 smp_processor_id());
354 add_taint(TAINT_MACHINE_CHECK); 355 add_taint(TAINT_MACHINE_CHECK);
355} 356}
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index c5f59d071425..ac140c7be396 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -827,7 +827,7 @@ int __init amd_special_default_mtrr(void)
827 827
828 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 828 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
829 return 0; 829 return 0;
830 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) 830 if (boot_cpu_data.x86 < 0xf)
831 return 0; 831 return 0;
832 /* In case some hypervisor doesn't pass SYSCFG through: */ 832 /* In case some hypervisor doesn't pass SYSCFG through: */
833 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) 833 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 7d28d7d03885..9f27228ceffd 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -64,18 +64,59 @@ static inline void k8_check_syscfg_dram_mod_en(void)
64 } 64 }
65} 65}
66 66
67/* Get the size of contiguous MTRR range */
68static u64 get_mtrr_size(u64 mask)
69{
70 u64 size;
71
72 mask >>= PAGE_SHIFT;
73 mask |= size_or_mask;
74 size = -mask;
75 size <<= PAGE_SHIFT;
76 return size;
77}
78
67/* 79/*
68 * Returns the effective MTRR type for the region 80 * Check and return the effective type for MTRR-MTRR type overlap.
69 * Error returns: 81 * Returns 1 if the effective type is UNCACHEABLE, else returns 0
70 * - 0xFE - when the range is "not entirely covered" by _any_ var range MTRR
71 * - 0xFF - when MTRR is not enabled
72 */ 82 */
73u8 mtrr_type_lookup(u64 start, u64 end) 83static int check_type_overlap(u8 *prev, u8 *curr)
84{
85 if (*prev == MTRR_TYPE_UNCACHABLE || *curr == MTRR_TYPE_UNCACHABLE) {
86 *prev = MTRR_TYPE_UNCACHABLE;
87 *curr = MTRR_TYPE_UNCACHABLE;
88 return 1;
89 }
90
91 if ((*prev == MTRR_TYPE_WRBACK && *curr == MTRR_TYPE_WRTHROUGH) ||
92 (*prev == MTRR_TYPE_WRTHROUGH && *curr == MTRR_TYPE_WRBACK)) {
93 *prev = MTRR_TYPE_WRTHROUGH;
94 *curr = MTRR_TYPE_WRTHROUGH;
95 }
96
97 if (*prev != *curr) {
98 *prev = MTRR_TYPE_UNCACHABLE;
99 *curr = MTRR_TYPE_UNCACHABLE;
100 return 1;
101 }
102
103 return 0;
104}
105
106/*
107 * Error/Semi-error returns:
108 * 0xFF - when MTRR is not enabled
109 * *repeat == 1 implies [start:end] spanned across MTRR range and type returned
110 * corresponds only to [start:*partial_end].
111 * Caller has to lookup again for [*partial_end:end].
112 */
113static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
74{ 114{
75 int i; 115 int i;
76 u64 base, mask; 116 u64 base, mask;
77 u8 prev_match, curr_match; 117 u8 prev_match, curr_match;
78 118
119 *repeat = 0;
79 if (!mtrr_state_set) 120 if (!mtrr_state_set)
80 return 0xFF; 121 return 0xFF;
81 122
@@ -126,8 +167,34 @@ u8 mtrr_type_lookup(u64 start, u64 end)
126 167
127 start_state = ((start & mask) == (base & mask)); 168 start_state = ((start & mask) == (base & mask));
128 end_state = ((end & mask) == (base & mask)); 169 end_state = ((end & mask) == (base & mask));
129 if (start_state != end_state) 170
130 return 0xFE; 171 if (start_state != end_state) {
172 /*
173 * We have start:end spanning across an MTRR.
174 * We split the region into
175 * either
176 * (start:mtrr_end) (mtrr_end:end)
177 * or
178 * (start:mtrr_start) (mtrr_start:end)
179 * depending on kind of overlap.
180 * Return the type for first region and a pointer to
181 * the start of second region so that caller will
182 * lookup again on the second region.
183 * Note: This way we handle multiple overlaps as well.
184 */
185 if (start_state)
186 *partial_end = base + get_mtrr_size(mask);
187 else
188 *partial_end = base;
189
190 if (unlikely(*partial_end <= start)) {
191 WARN_ON(1);
192 *partial_end = start + PAGE_SIZE;
193 }
194
195 end = *partial_end - 1; /* end is inclusive */
196 *repeat = 1;
197 }
131 198
132 if ((start & mask) != (base & mask)) 199 if ((start & mask) != (base & mask))
133 continue; 200 continue;
@@ -138,21 +205,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
138 continue; 205 continue;
139 } 206 }
140 207
141 if (prev_match == MTRR_TYPE_UNCACHABLE || 208 if (check_type_overlap(&prev_match, &curr_match))
142 curr_match == MTRR_TYPE_UNCACHABLE) { 209 return curr_match;
143 return MTRR_TYPE_UNCACHABLE;
144 }
145
146 if ((prev_match == MTRR_TYPE_WRBACK &&
147 curr_match == MTRR_TYPE_WRTHROUGH) ||
148 (prev_match == MTRR_TYPE_WRTHROUGH &&
149 curr_match == MTRR_TYPE_WRBACK)) {
150 prev_match = MTRR_TYPE_WRTHROUGH;
151 curr_match = MTRR_TYPE_WRTHROUGH;
152 }
153
154 if (prev_match != curr_match)
155 return MTRR_TYPE_UNCACHABLE;
156 } 210 }
157 211
158 if (mtrr_tom2) { 212 if (mtrr_tom2) {
@@ -166,6 +220,36 @@ u8 mtrr_type_lookup(u64 start, u64 end)
166 return mtrr_state.def_type; 220 return mtrr_state.def_type;
167} 221}
168 222
223/*
224 * Returns the effective MTRR type for the region
225 * Error return:
226 * 0xFF - when MTRR is not enabled
227 */
228u8 mtrr_type_lookup(u64 start, u64 end)
229{
230 u8 type, prev_type;
231 int repeat;
232 u64 partial_end;
233
234 type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
235
236 /*
237 * Common path is with repeat = 0.
238 * However, we can have cases where [start:end] spans across some
239 * MTRR range. Do repeated lookups for that case here.
240 */
241 while (repeat) {
242 prev_type = type;
243 start = partial_end;
244 type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
245
246 if (check_type_overlap(&prev_type, &type))
247 return type;
248 }
249
250 return type;
251}
252
169/* Get the MSR pair relating to a var range */ 253/* Get the MSR pair relating to a var range */
170static void 254static void
171get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) 255get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 03a5b0385ad6..fe73c1844a9a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -531,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
531/* 531/*
532 * Setup the hardware configuration for a given attr_type 532 * Setup the hardware configuration for a given attr_type
533 */ 533 */
534static int __hw_perf_event_init(struct perf_event *event) 534static int __x86_pmu_event_init(struct perf_event *event)
535{ 535{
536 int err; 536 int err;
537 537
@@ -584,7 +584,7 @@ static void x86_pmu_disable_all(void)
584 } 584 }
585} 585}
586 586
587void hw_perf_disable(void) 587static void x86_pmu_disable(struct pmu *pmu)
588{ 588{
589 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 589 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
590 590
@@ -619,7 +619,7 @@ static void x86_pmu_enable_all(int added)
619 } 619 }
620} 620}
621 621
622static const struct pmu pmu; 622static struct pmu pmu;
623 623
624static inline int is_x86_event(struct perf_event *event) 624static inline int is_x86_event(struct perf_event *event)
625{ 625{
@@ -801,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
801 hwc->last_tag == cpuc->tags[i]; 801 hwc->last_tag == cpuc->tags[i];
802} 802}
803 803
804static int x86_pmu_start(struct perf_event *event); 804static void x86_pmu_start(struct perf_event *event, int flags);
805static void x86_pmu_stop(struct perf_event *event); 805static void x86_pmu_stop(struct perf_event *event, int flags);
806 806
807void hw_perf_enable(void) 807static void x86_pmu_enable(struct pmu *pmu)
808{ 808{
809 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 809 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
810 struct perf_event *event; 810 struct perf_event *event;
@@ -840,7 +840,14 @@ void hw_perf_enable(void)
840 match_prev_assignment(hwc, cpuc, i)) 840 match_prev_assignment(hwc, cpuc, i))
841 continue; 841 continue;
842 842
843 x86_pmu_stop(event); 843 /*
844 * Ensure we don't accidentally enable a stopped
845 * counter simply because we rescheduled.
846 */
847 if (hwc->state & PERF_HES_STOPPED)
848 hwc->state |= PERF_HES_ARCH;
849
850 x86_pmu_stop(event, PERF_EF_UPDATE);
844 } 851 }
845 852
846 for (i = 0; i < cpuc->n_events; i++) { 853 for (i = 0; i < cpuc->n_events; i++) {
@@ -852,7 +859,10 @@ void hw_perf_enable(void)
852 else if (i < n_running) 859 else if (i < n_running)
853 continue; 860 continue;
854 861
855 x86_pmu_start(event); 862 if (hwc->state & PERF_HES_ARCH)
863 continue;
864
865 x86_pmu_start(event, PERF_EF_RELOAD);
856 } 866 }
857 cpuc->n_added = 0; 867 cpuc->n_added = 0;
858 perf_events_lapic_init(); 868 perf_events_lapic_init();
@@ -953,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event)
953} 963}
954 964
955/* 965/*
956 * activate a single event 966 * Add a single event to the PMU.
957 * 967 *
958 * The event is added to the group of enabled events 968 * The event is added to the group of enabled events
959 * but only if it can be scehduled with existing events. 969 * but only if it can be scehduled with existing events.
960 *
961 * Called with PMU disabled. If successful and return value 1,
962 * then guaranteed to call perf_enable() and hw_perf_enable()
963 */ 970 */
964static int x86_pmu_enable(struct perf_event *event) 971static int x86_pmu_add(struct perf_event *event, int flags)
965{ 972{
966 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 973 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
967 struct hw_perf_event *hwc; 974 struct hw_perf_event *hwc;
@@ -970,58 +977,67 @@ static int x86_pmu_enable(struct perf_event *event)
970 977
971 hwc = &event->hw; 978 hwc = &event->hw;
972 979
980 perf_pmu_disable(event->pmu);
973 n0 = cpuc->n_events; 981 n0 = cpuc->n_events;
974 n = collect_events(cpuc, event, false); 982 ret = n = collect_events(cpuc, event, false);
975 if (n < 0) 983 if (ret < 0)
976 return n; 984 goto out;
985
986 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
987 if (!(flags & PERF_EF_START))
988 hwc->state |= PERF_HES_ARCH;
977 989
978 /* 990 /*
979 * If group events scheduling transaction was started, 991 * If group events scheduling transaction was started,
980 * skip the schedulability test here, it will be peformed 992 * skip the schedulability test here, it will be peformed
981 * at commit time(->commit_txn) as a whole 993 * at commit time (->commit_txn) as a whole
982 */ 994 */
983 if (cpuc->group_flag & PERF_EVENT_TXN) 995 if (cpuc->group_flag & PERF_EVENT_TXN)
984 goto out; 996 goto done_collect;
985 997
986 ret = x86_pmu.schedule_events(cpuc, n, assign); 998 ret = x86_pmu.schedule_events(cpuc, n, assign);
987 if (ret) 999 if (ret)
988 return ret; 1000 goto out;
989 /* 1001 /*
990 * copy new assignment, now we know it is possible 1002 * copy new assignment, now we know it is possible
991 * will be used by hw_perf_enable() 1003 * will be used by hw_perf_enable()
992 */ 1004 */
993 memcpy(cpuc->assign, assign, n*sizeof(int)); 1005 memcpy(cpuc->assign, assign, n*sizeof(int));
994 1006
995out: 1007done_collect:
996 cpuc->n_events = n; 1008 cpuc->n_events = n;
997 cpuc->n_added += n - n0; 1009 cpuc->n_added += n - n0;
998 cpuc->n_txn += n - n0; 1010 cpuc->n_txn += n - n0;
999 1011
1000 return 0; 1012 ret = 0;
1013out:
1014 perf_pmu_enable(event->pmu);
1015 return ret;
1001} 1016}
1002 1017
1003static int x86_pmu_start(struct perf_event *event) 1018static void x86_pmu_start(struct perf_event *event, int flags)
1004{ 1019{
1005 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1020 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1006 int idx = event->hw.idx; 1021 int idx = event->hw.idx;
1007 1022
1008 if (idx == -1) 1023 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1009 return -EAGAIN; 1024 return;
1025
1026 if (WARN_ON_ONCE(idx == -1))
1027 return;
1028
1029 if (flags & PERF_EF_RELOAD) {
1030 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1031 x86_perf_event_set_period(event);
1032 }
1033
1034 event->hw.state = 0;
1010 1035
1011 x86_perf_event_set_period(event);
1012 cpuc->events[idx] = event; 1036 cpuc->events[idx] = event;
1013 __set_bit(idx, cpuc->active_mask); 1037 __set_bit(idx, cpuc->active_mask);
1014 __set_bit(idx, cpuc->running); 1038 __set_bit(idx, cpuc->running);
1015 x86_pmu.enable(event); 1039 x86_pmu.enable(event);
1016 perf_event_update_userpage(event); 1040 perf_event_update_userpage(event);
1017
1018 return 0;
1019}
1020
1021static void x86_pmu_unthrottle(struct perf_event *event)
1022{
1023 int ret = x86_pmu_start(event);
1024 WARN_ON_ONCE(ret);
1025} 1041}
1026 1042
1027void perf_event_print_debug(void) 1043void perf_event_print_debug(void)
@@ -1078,27 +1094,29 @@ void perf_event_print_debug(void)
1078 local_irq_restore(flags); 1094 local_irq_restore(flags);
1079} 1095}
1080 1096
1081static void x86_pmu_stop(struct perf_event *event) 1097static void x86_pmu_stop(struct perf_event *event, int flags)
1082{ 1098{
1083 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1099 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1084 struct hw_perf_event *hwc = &event->hw; 1100 struct hw_perf_event *hwc = &event->hw;
1085 int idx = hwc->idx;
1086 1101
1087 if (!__test_and_clear_bit(idx, cpuc->active_mask)) 1102 if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
1088 return; 1103 x86_pmu.disable(event);
1089 1104 cpuc->events[hwc->idx] = NULL;
1090 x86_pmu.disable(event); 1105 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
1091 1106 hwc->state |= PERF_HES_STOPPED;
1092 /* 1107 }
1093 * Drain the remaining delta count out of a event
1094 * that we are disabling:
1095 */
1096 x86_perf_event_update(event);
1097 1108
1098 cpuc->events[idx] = NULL; 1109 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1110 /*
1111 * Drain the remaining delta count out of a event
1112 * that we are disabling:
1113 */
1114 x86_perf_event_update(event);
1115 hwc->state |= PERF_HES_UPTODATE;
1116 }
1099} 1117}
1100 1118
1101static void x86_pmu_disable(struct perf_event *event) 1119static void x86_pmu_del(struct perf_event *event, int flags)
1102{ 1120{
1103 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1121 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1104 int i; 1122 int i;
@@ -1111,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event)
1111 if (cpuc->group_flag & PERF_EVENT_TXN) 1129 if (cpuc->group_flag & PERF_EVENT_TXN)
1112 return; 1130 return;
1113 1131
1114 x86_pmu_stop(event); 1132 x86_pmu_stop(event, PERF_EF_UPDATE);
1115 1133
1116 for (i = 0; i < cpuc->n_events; i++) { 1134 for (i = 0; i < cpuc->n_events; i++) {
1117 if (event == cpuc->event_list[i]) { 1135 if (event == cpuc->event_list[i]) {
@@ -1134,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1134 struct perf_sample_data data; 1152 struct perf_sample_data data;
1135 struct cpu_hw_events *cpuc; 1153 struct cpu_hw_events *cpuc;
1136 struct perf_event *event; 1154 struct perf_event *event;
1137 struct hw_perf_event *hwc;
1138 int idx, handled = 0; 1155 int idx, handled = 0;
1139 u64 val; 1156 u64 val;
1140 1157
@@ -1155,7 +1172,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1155 } 1172 }
1156 1173
1157 event = cpuc->events[idx]; 1174 event = cpuc->events[idx];
1158 hwc = &event->hw;
1159 1175
1160 val = x86_perf_event_update(event); 1176 val = x86_perf_event_update(event);
1161 if (val & (1ULL << (x86_pmu.cntval_bits - 1))) 1177 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1171,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1171 continue; 1187 continue;
1172 1188
1173 if (perf_event_overflow(event, 1, &data, regs)) 1189 if (perf_event_overflow(event, 1, &data, regs))
1174 x86_pmu_stop(event); 1190 x86_pmu_stop(event, 0);
1175 } 1191 }
1176 1192
1177 if (handled) 1193 if (handled)
@@ -1180,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1180 return handled; 1196 return handled;
1181} 1197}
1182 1198
1183void smp_perf_pending_interrupt(struct pt_regs *regs)
1184{
1185 irq_enter();
1186 ack_APIC_irq();
1187 inc_irq_stat(apic_pending_irqs);
1188 perf_event_do_pending();
1189 irq_exit();
1190}
1191
1192void set_perf_event_pending(void)
1193{
1194#ifdef CONFIG_X86_LOCAL_APIC
1195 if (!x86_pmu.apic || !x86_pmu_initialized())
1196 return;
1197
1198 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1199#endif
1200}
1201
1202void perf_events_lapic_init(void) 1199void perf_events_lapic_init(void)
1203{ 1200{
1204 if (!x86_pmu.apic || !x86_pmu_initialized()) 1201 if (!x86_pmu.apic || !x86_pmu_initialized())
@@ -1388,7 +1385,6 @@ void __init init_hw_perf_events(void)
1388 x86_pmu.num_counters = X86_PMC_MAX_GENERIC; 1385 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1389 } 1386 }
1390 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; 1387 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1391 perf_max_events = x86_pmu.num_counters;
1392 1388
1393 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 1389 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1394 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 1390 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
@@ -1424,6 +1420,7 @@ void __init init_hw_perf_events(void)
1424 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1420 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1425 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1421 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1426 1422
1423 perf_pmu_register(&pmu);
1427 perf_cpu_notifier(x86_pmu_notifier); 1424 perf_cpu_notifier(x86_pmu_notifier);
1428} 1425}
1429 1426
@@ -1437,10 +1434,11 @@ static inline void x86_pmu_read(struct perf_event *event)
1437 * Set the flag to make pmu::enable() not perform the 1434 * Set the flag to make pmu::enable() not perform the
1438 * schedulability test, it will be performed at commit time 1435 * schedulability test, it will be performed at commit time
1439 */ 1436 */
1440static void x86_pmu_start_txn(const struct pmu *pmu) 1437static void x86_pmu_start_txn(struct pmu *pmu)
1441{ 1438{
1442 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1439 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1443 1440
1441 perf_pmu_disable(pmu);
1444 cpuc->group_flag |= PERF_EVENT_TXN; 1442 cpuc->group_flag |= PERF_EVENT_TXN;
1445 cpuc->n_txn = 0; 1443 cpuc->n_txn = 0;
1446} 1444}
@@ -1450,7 +1448,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1450 * Clear the flag and pmu::enable() will perform the 1448 * Clear the flag and pmu::enable() will perform the
1451 * schedulability test. 1449 * schedulability test.
1452 */ 1450 */
1453static void x86_pmu_cancel_txn(const struct pmu *pmu) 1451static void x86_pmu_cancel_txn(struct pmu *pmu)
1454{ 1452{
1455 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1453 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1456 1454
@@ -1460,6 +1458,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1460 */ 1458 */
1461 cpuc->n_added -= cpuc->n_txn; 1459 cpuc->n_added -= cpuc->n_txn;
1462 cpuc->n_events -= cpuc->n_txn; 1460 cpuc->n_events -= cpuc->n_txn;
1461 perf_pmu_enable(pmu);
1463} 1462}
1464 1463
1465/* 1464/*
@@ -1467,7 +1466,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1467 * Perform the group schedulability test as a whole 1466 * Perform the group schedulability test as a whole
1468 * Return 0 if success 1467 * Return 0 if success
1469 */ 1468 */
1470static int x86_pmu_commit_txn(const struct pmu *pmu) 1469static int x86_pmu_commit_txn(struct pmu *pmu)
1471{ 1470{
1472 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1471 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1473 int assign[X86_PMC_IDX_MAX]; 1472 int assign[X86_PMC_IDX_MAX];
@@ -1489,22 +1488,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1489 memcpy(cpuc->assign, assign, n*sizeof(int)); 1488 memcpy(cpuc->assign, assign, n*sizeof(int));
1490 1489
1491 cpuc->group_flag &= ~PERF_EVENT_TXN; 1490 cpuc->group_flag &= ~PERF_EVENT_TXN;
1492 1491 perf_pmu_enable(pmu);
1493 return 0; 1492 return 0;
1494} 1493}
1495 1494
1496static const struct pmu pmu = {
1497 .enable = x86_pmu_enable,
1498 .disable = x86_pmu_disable,
1499 .start = x86_pmu_start,
1500 .stop = x86_pmu_stop,
1501 .read = x86_pmu_read,
1502 .unthrottle = x86_pmu_unthrottle,
1503 .start_txn = x86_pmu_start_txn,
1504 .cancel_txn = x86_pmu_cancel_txn,
1505 .commit_txn = x86_pmu_commit_txn,
1506};
1507
1508/* 1495/*
1509 * validate that we can schedule this event 1496 * validate that we can schedule this event
1510 */ 1497 */
@@ -1579,12 +1566,22 @@ out:
1579 return ret; 1566 return ret;
1580} 1567}
1581 1568
1582const struct pmu *hw_perf_event_init(struct perf_event *event) 1569int x86_pmu_event_init(struct perf_event *event)
1583{ 1570{
1584 const struct pmu *tmp; 1571 struct pmu *tmp;
1585 int err; 1572 int err;
1586 1573
1587 err = __hw_perf_event_init(event); 1574 switch (event->attr.type) {
1575 case PERF_TYPE_RAW:
1576 case PERF_TYPE_HARDWARE:
1577 case PERF_TYPE_HW_CACHE:
1578 break;
1579
1580 default:
1581 return -ENOENT;
1582 }
1583
1584 err = __x86_pmu_event_init(event);
1588 if (!err) { 1585 if (!err) {
1589 /* 1586 /*
1590 * we temporarily connect event to its pmu 1587 * we temporarily connect event to its pmu
@@ -1604,26 +1601,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1604 if (err) { 1601 if (err) {
1605 if (event->destroy) 1602 if (event->destroy)
1606 event->destroy(event); 1603 event->destroy(event);
1607 return ERR_PTR(err);
1608 } 1604 }
1609 1605
1610 return &pmu; 1606 return err;
1611} 1607}
1612 1608
1613/* 1609static struct pmu pmu = {
1614 * callchain support 1610 .pmu_enable = x86_pmu_enable,
1615 */ 1611 .pmu_disable = x86_pmu_disable,
1616 1612
1617static inline 1613 .event_init = x86_pmu_event_init,
1618void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1619{
1620 if (entry->nr < PERF_MAX_STACK_DEPTH)
1621 entry->ip[entry->nr++] = ip;
1622}
1623 1614
1624static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 1615 .add = x86_pmu_add,
1625static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); 1616 .del = x86_pmu_del,
1617 .start = x86_pmu_start,
1618 .stop = x86_pmu_stop,
1619 .read = x86_pmu_read,
1626 1620
1621 .start_txn = x86_pmu_start_txn,
1622 .cancel_txn = x86_pmu_cancel_txn,
1623 .commit_txn = x86_pmu_commit_txn,
1624};
1625
1626/*
1627 * callchain support
1628 */
1627 1629
1628static void 1630static void
1629backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) 1631backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
@@ -1645,7 +1647,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1645{ 1647{
1646 struct perf_callchain_entry *entry = data; 1648 struct perf_callchain_entry *entry = data;
1647 1649
1648 callchain_store(entry, addr); 1650 perf_callchain_store(entry, addr);
1649} 1651}
1650 1652
1651static const struct stacktrace_ops backtrace_ops = { 1653static const struct stacktrace_ops backtrace_ops = {
@@ -1656,11 +1658,15 @@ static const struct stacktrace_ops backtrace_ops = {
1656 .walk_stack = print_context_stack_bp, 1658 .walk_stack = print_context_stack_bp,
1657}; 1659};
1658 1660
1659static void 1661void
1660perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1662perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1661{ 1663{
1662 callchain_store(entry, PERF_CONTEXT_KERNEL); 1664 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1663 callchain_store(entry, regs->ip); 1665 /* TODO: We don't support guest os callchain now */
1666 return;
1667 }
1668
1669 perf_callchain_store(entry, regs->ip);
1664 1670
1665 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1671 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1666} 1672}
@@ -1689,7 +1695,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1689 if (fp < compat_ptr(regs->sp)) 1695 if (fp < compat_ptr(regs->sp))
1690 break; 1696 break;
1691 1697
1692 callchain_store(entry, frame.return_address); 1698 perf_callchain_store(entry, frame.return_address);
1693 fp = compat_ptr(frame.next_frame); 1699 fp = compat_ptr(frame.next_frame);
1694 } 1700 }
1695 return 1; 1701 return 1;
@@ -1702,19 +1708,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1702} 1708}
1703#endif 1709#endif
1704 1710
1705static void 1711void
1706perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) 1712perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1707{ 1713{
1708 struct stack_frame frame; 1714 struct stack_frame frame;
1709 const void __user *fp; 1715 const void __user *fp;
1710 1716
1711 if (!user_mode(regs)) 1717 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1712 regs = task_pt_regs(current); 1718 /* TODO: We don't support guest os callchain now */
1719 return;
1720 }
1713 1721
1714 fp = (void __user *)regs->bp; 1722 fp = (void __user *)regs->bp;
1715 1723
1716 callchain_store(entry, PERF_CONTEXT_USER); 1724 perf_callchain_store(entry, regs->ip);
1717 callchain_store(entry, regs->ip);
1718 1725
1719 if (perf_callchain_user32(regs, entry)) 1726 if (perf_callchain_user32(regs, entry))
1720 return; 1727 return;
@@ -1731,52 +1738,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1731 if ((unsigned long)fp < regs->sp) 1738 if ((unsigned long)fp < regs->sp)
1732 break; 1739 break;
1733 1740
1734 callchain_store(entry, frame.return_address); 1741 perf_callchain_store(entry, frame.return_address);
1735 fp = frame.next_frame; 1742 fp = frame.next_frame;
1736 } 1743 }
1737} 1744}
1738 1745
1739static void
1740perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1741{
1742 int is_user;
1743
1744 if (!regs)
1745 return;
1746
1747 is_user = user_mode(regs);
1748
1749 if (is_user && current->state != TASK_RUNNING)
1750 return;
1751
1752 if (!is_user)
1753 perf_callchain_kernel(regs, entry);
1754
1755 if (current->mm)
1756 perf_callchain_user(regs, entry);
1757}
1758
1759struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1760{
1761 struct perf_callchain_entry *entry;
1762
1763 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1764 /* TODO: We don't support guest os callchain now */
1765 return NULL;
1766 }
1767
1768 if (in_nmi())
1769 entry = &__get_cpu_var(pmc_nmi_entry);
1770 else
1771 entry = &__get_cpu_var(pmc_irq_entry);
1772
1773 entry->nr = 0;
1774
1775 perf_do_callchain(regs, entry);
1776
1777 return entry;
1778}
1779
1780unsigned long perf_instruction_pointer(struct pt_regs *regs) 1746unsigned long perf_instruction_pointer(struct pt_regs *regs)
1781{ 1747{
1782 unsigned long ip; 1748 unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index c2897b7b4a3b..46d58448c3af 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids
52 [ C(DTLB) ] = { 52 [ C(DTLB) ] = {
53 [ C(OP_READ) ] = { 53 [ C(OP_READ) ] = {
54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
55 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ 55 [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
56 }, 56 },
57 [ C(OP_WRITE) ] = { 57 [ C(OP_WRITE) ] = {
58 [ C(RESULT_ACCESS) ] = 0, 58 [ C(RESULT_ACCESS) ] = 0,
@@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids
66 [ C(ITLB) ] = { 66 [ C(ITLB) ] = {
67 [ C(OP_READ) ] = { 67 [ C(OP_READ) ] = {
68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ 68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
69 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ 69 [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
70 }, 70 },
71 [ C(OP_WRITE) ] = { 71 [ C(OP_WRITE) ] = {
72 [ C(RESULT_ACCESS) ] = -1, 72 [ C(RESULT_ACCESS) ] = -1,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ee05c90012d2..c8f5c088cad1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
713 struct cpu_hw_events *cpuc; 713 struct cpu_hw_events *cpuc;
714 int bit, loops; 714 int bit, loops;
715 u64 status; 715 u64 status;
716 int handled = 0; 716 int handled;
717 717
718 perf_sample_data_init(&data, 0); 718 perf_sample_data_init(&data, 0);
719 719
720 cpuc = &__get_cpu_var(cpu_hw_events); 720 cpuc = &__get_cpu_var(cpu_hw_events);
721 721
722 intel_pmu_disable_all(); 722 intel_pmu_disable_all();
723 intel_pmu_drain_bts_buffer(); 723 handled = intel_pmu_drain_bts_buffer();
724 status = intel_pmu_get_status(); 724 status = intel_pmu_get_status();
725 if (!status) { 725 if (!status) {
726 intel_pmu_enable_all(0); 726 intel_pmu_enable_all(0);
727 return 0; 727 return handled;
728 } 728 }
729 729
730 loops = 0; 730 loops = 0;
@@ -763,7 +763,7 @@ again:
763 data.period = event->hw.last_period; 763 data.period = event->hw.last_period;
764 764
765 if (perf_event_overflow(event, 1, &data, regs)) 765 if (perf_event_overflow(event, 1, &data, regs))
766 x86_pmu_stop(event); 766 x86_pmu_stop(event, 0);
767 } 767 }
768 768
769 /* 769 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 18018d1311cd..4977f9c400e5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void)
214 update_debugctlmsr(debugctlmsr); 214 update_debugctlmsr(debugctlmsr);
215} 215}
216 216
217static void intel_pmu_drain_bts_buffer(void) 217static int intel_pmu_drain_bts_buffer(void)
218{ 218{
219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
220 struct debug_store *ds = cpuc->ds; 220 struct debug_store *ds = cpuc->ds;
@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void)
231 struct pt_regs regs; 231 struct pt_regs regs;
232 232
233 if (!event) 233 if (!event)
234 return; 234 return 0;
235 235
236 if (!ds) 236 if (!ds)
237 return; 237 return 0;
238 238
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
240 top = (struct bts_record *)(unsigned long)ds->bts_index; 240 top = (struct bts_record *)(unsigned long)ds->bts_index;
241 241
242 if (top <= at) 242 if (top <= at)
243 return; 243 return 0;
244 244
245 ds->bts_index = ds->bts_buffer_base; 245 ds->bts_index = ds->bts_buffer_base;
246 246
@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void)
256 perf_prepare_sample(&header, &data, event, &regs); 256 perf_prepare_sample(&header, &data, event, &regs);
257 257
258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) 258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
259 return; 259 return 1;
260 260
261 for (; at < top; at++) { 261 for (; at < top; at++) {
262 data.ip = at->from; 262 data.ip = at->from;
@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void)
270 /* There's new data available. */ 270 /* There's new data available. */
271 event->hw.interrupts++; 271 event->hw.interrupts++;
272 event->pending_kill = POLL_IN; 272 event->pending_kill = POLL_IN;
273 return 1;
273} 274}
274 275
275/* 276/*
@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
491 regs.flags &= ~PERF_EFLAGS_EXACT; 492 regs.flags &= ~PERF_EFLAGS_EXACT;
492 493
493 if (perf_event_overflow(event, 1, &data, &regs)) 494 if (perf_event_overflow(event, 1, &data, &regs))
494 x86_pmu_stop(event); 495 x86_pmu_stop(event, 0);
495} 496}
496 497
497static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) 498static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 249015173992..81400b93e694 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -18,6 +18,8 @@
18struct p4_event_bind { 18struct p4_event_bind {
19 unsigned int opcode; /* Event code and ESCR selector */ 19 unsigned int opcode; /* Event code and ESCR selector */
20 unsigned int escr_msr[2]; /* ESCR MSR for this event */ 20 unsigned int escr_msr[2]; /* ESCR MSR for this event */
21 unsigned int escr_emask; /* valid ESCR EventMask bits */
22 unsigned int shared; /* event is shared across threads */
21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ 23 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22}; 24};
23 25
@@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = {
66 [P4_EVENT_TC_DELIVER_MODE] = { 68 [P4_EVENT_TC_DELIVER_MODE] = {
67 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), 69 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
68 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 70 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
71 .escr_emask =
72 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
73 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) |
74 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) |
75 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) |
76 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) |
77 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) |
78 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
79 .shared = 1,
69 .cntr = { {4, 5, -1}, {6, 7, -1} }, 80 .cntr = { {4, 5, -1}, {6, 7, -1} },
70 }, 81 },
71 [P4_EVENT_BPU_FETCH_REQUEST] = { 82 [P4_EVENT_BPU_FETCH_REQUEST] = {
72 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), 83 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
73 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, 84 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
85 .escr_emask =
86 P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
74 .cntr = { {0, -1, -1}, {2, -1, -1} }, 87 .cntr = { {0, -1, -1}, {2, -1, -1} },
75 }, 88 },
76 [P4_EVENT_ITLB_REFERENCE] = { 89 [P4_EVENT_ITLB_REFERENCE] = {
77 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), 90 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
78 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, 91 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
92 .escr_emask =
93 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) |
94 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) |
95 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
79 .cntr = { {0, -1, -1}, {2, -1, -1} }, 96 .cntr = { {0, -1, -1}, {2, -1, -1} },
80 }, 97 },
81 [P4_EVENT_MEMORY_CANCEL] = { 98 [P4_EVENT_MEMORY_CANCEL] = {
82 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), 99 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
83 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 100 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
101 .escr_emask =
102 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) |
103 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
84 .cntr = { {8, 9, -1}, {10, 11, -1} }, 104 .cntr = { {8, 9, -1}, {10, 11, -1} },
85 }, 105 },
86 [P4_EVENT_MEMORY_COMPLETE] = { 106 [P4_EVENT_MEMORY_COMPLETE] = {
87 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), 107 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
88 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 108 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
109 .escr_emask =
110 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) |
111 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
89 .cntr = { {8, 9, -1}, {10, 11, -1} }, 112 .cntr = { {8, 9, -1}, {10, 11, -1} },
90 }, 113 },
91 [P4_EVENT_LOAD_PORT_REPLAY] = { 114 [P4_EVENT_LOAD_PORT_REPLAY] = {
92 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), 115 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
93 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, 116 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
117 .escr_emask =
118 P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
94 .cntr = { {8, 9, -1}, {10, 11, -1} }, 119 .cntr = { {8, 9, -1}, {10, 11, -1} },
95 }, 120 },
96 [P4_EVENT_STORE_PORT_REPLAY] = { 121 [P4_EVENT_STORE_PORT_REPLAY] = {
97 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), 122 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
98 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 123 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
124 .escr_emask =
125 P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
99 .cntr = { {8, 9, -1}, {10, 11, -1} }, 126 .cntr = { {8, 9, -1}, {10, 11, -1} },
100 }, 127 },
101 [P4_EVENT_MOB_LOAD_REPLAY] = { 128 [P4_EVENT_MOB_LOAD_REPLAY] = {
102 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), 129 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
103 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, 130 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
131 .escr_emask =
132 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) |
133 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) |
134 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) |
135 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
104 .cntr = { {0, -1, -1}, {2, -1, -1} }, 136 .cntr = { {0, -1, -1}, {2, -1, -1} },
105 }, 137 },
106 [P4_EVENT_PAGE_WALK_TYPE] = { 138 [P4_EVENT_PAGE_WALK_TYPE] = {
107 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), 139 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
108 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, 140 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
141 .escr_emask =
142 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) |
143 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
144 .shared = 1,
109 .cntr = { {0, -1, -1}, {2, -1, -1} }, 145 .cntr = { {0, -1, -1}, {2, -1, -1} },
110 }, 146 },
111 [P4_EVENT_BSQ_CACHE_REFERENCE] = { 147 [P4_EVENT_BSQ_CACHE_REFERENCE] = {
112 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), 148 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
113 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, 149 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
150 .escr_emask =
151 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
152 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
153 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
154 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
155 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
156 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) |
157 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
158 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
159 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
114 .cntr = { {0, -1, -1}, {2, -1, -1} }, 160 .cntr = { {0, -1, -1}, {2, -1, -1} },
115 }, 161 },
116 [P4_EVENT_IOQ_ALLOCATION] = { 162 [P4_EVENT_IOQ_ALLOCATION] = {
117 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), 163 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
118 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 164 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
165 .escr_emask =
166 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) |
167 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) |
168 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) |
169 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) |
170 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) |
171 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) |
172 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) |
173 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) |
174 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) |
175 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) |
176 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
119 .cntr = { {0, -1, -1}, {2, -1, -1} }, 177 .cntr = { {0, -1, -1}, {2, -1, -1} },
120 }, 178 },
121 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 179 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
122 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), 180 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
123 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, 181 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
182 .escr_emask =
183 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) |
184 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) |
185 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) |
186 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) |
187 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) |
188 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) |
189 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) |
190 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) |
191 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) |
192 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) |
193 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
124 .cntr = { {2, -1, -1}, {3, -1, -1} }, 194 .cntr = { {2, -1, -1}, {3, -1, -1} },
125 }, 195 },
126 [P4_EVENT_FSB_DATA_ACTIVITY] = { 196 [P4_EVENT_FSB_DATA_ACTIVITY] = {
127 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), 197 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
128 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 198 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
199 .escr_emask =
200 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
201 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) |
202 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) |
203 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) |
204 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) |
205 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
206 .shared = 1,
129 .cntr = { {0, -1, -1}, {2, -1, -1} }, 207 .cntr = { {0, -1, -1}, {2, -1, -1} },
130 }, 208 },
131 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ 209 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
132 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), 210 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
133 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, 211 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
212 .escr_emask =
213 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) |
214 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) |
215 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) |
216 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) |
217 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) |
218 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) |
219 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) |
220 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) |
221 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) |
222 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) |
223 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) |
224 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) |
225 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
134 .cntr = { {0, -1, -1}, {1, -1, -1} }, 226 .cntr = { {0, -1, -1}, {1, -1, -1} },
135 }, 227 },
136 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 228 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
137 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), 229 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
138 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, 230 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
231 .escr_emask =
232 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) |
233 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) |
234 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) |
235 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) |
236 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) |
237 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) |
238 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) |
239 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) |
240 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) |
241 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) |
242 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) |
243 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) |
244 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
139 .cntr = { {2, -1, -1}, {3, -1, -1} }, 245 .cntr = { {2, -1, -1}, {3, -1, -1} },
140 }, 246 },
141 [P4_EVENT_SSE_INPUT_ASSIST] = { 247 [P4_EVENT_SSE_INPUT_ASSIST] = {
142 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), 248 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
143 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 249 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
250 .escr_emask =
251 P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
252 .shared = 1,
144 .cntr = { {8, 9, -1}, {10, 11, -1} }, 253 .cntr = { {8, 9, -1}, {10, 11, -1} },
145 }, 254 },
146 [P4_EVENT_PACKED_SP_UOP] = { 255 [P4_EVENT_PACKED_SP_UOP] = {
147 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), 256 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
148 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 257 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
258 .escr_emask =
259 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
260 .shared = 1,
149 .cntr = { {8, 9, -1}, {10, 11, -1} }, 261 .cntr = { {8, 9, -1}, {10, 11, -1} },
150 }, 262 },
151 [P4_EVENT_PACKED_DP_UOP] = { 263 [P4_EVENT_PACKED_DP_UOP] = {
152 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), 264 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
153 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 265 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
266 .escr_emask =
267 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
268 .shared = 1,
154 .cntr = { {8, 9, -1}, {10, 11, -1} }, 269 .cntr = { {8, 9, -1}, {10, 11, -1} },
155 }, 270 },
156 [P4_EVENT_SCALAR_SP_UOP] = { 271 [P4_EVENT_SCALAR_SP_UOP] = {
157 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), 272 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
158 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 273 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
274 .escr_emask =
275 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
276 .shared = 1,
159 .cntr = { {8, 9, -1}, {10, 11, -1} }, 277 .cntr = { {8, 9, -1}, {10, 11, -1} },
160 }, 278 },
161 [P4_EVENT_SCALAR_DP_UOP] = { 279 [P4_EVENT_SCALAR_DP_UOP] = {
162 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), 280 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
163 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 281 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
282 .escr_emask =
283 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
284 .shared = 1,
164 .cntr = { {8, 9, -1}, {10, 11, -1} }, 285 .cntr = { {8, 9, -1}, {10, 11, -1} },
165 }, 286 },
166 [P4_EVENT_64BIT_MMX_UOP] = { 287 [P4_EVENT_64BIT_MMX_UOP] = {
167 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), 288 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
168 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 289 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
290 .escr_emask =
291 P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
292 .shared = 1,
169 .cntr = { {8, 9, -1}, {10, 11, -1} }, 293 .cntr = { {8, 9, -1}, {10, 11, -1} },
170 }, 294 },
171 [P4_EVENT_128BIT_MMX_UOP] = { 295 [P4_EVENT_128BIT_MMX_UOP] = {
172 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), 296 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
173 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 297 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
298 .escr_emask =
299 P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
300 .shared = 1,
174 .cntr = { {8, 9, -1}, {10, 11, -1} }, 301 .cntr = { {8, 9, -1}, {10, 11, -1} },
175 }, 302 },
176 [P4_EVENT_X87_FP_UOP] = { 303 [P4_EVENT_X87_FP_UOP] = {
177 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), 304 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
178 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 305 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
306 .escr_emask =
307 P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
308 .shared = 1,
179 .cntr = { {8, 9, -1}, {10, 11, -1} }, 309 .cntr = { {8, 9, -1}, {10, 11, -1} },
180 }, 310 },
181 [P4_EVENT_TC_MISC] = { 311 [P4_EVENT_TC_MISC] = {
182 .opcode = P4_OPCODE(P4_EVENT_TC_MISC), 312 .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
183 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 313 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
314 .escr_emask =
315 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
184 .cntr = { {4, 5, -1}, {6, 7, -1} }, 316 .cntr = { {4, 5, -1}, {6, 7, -1} },
185 }, 317 },
186 [P4_EVENT_GLOBAL_POWER_EVENTS] = { 318 [P4_EVENT_GLOBAL_POWER_EVENTS] = {
187 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), 319 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
188 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 320 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
321 .escr_emask =
322 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
189 .cntr = { {0, -1, -1}, {2, -1, -1} }, 323 .cntr = { {0, -1, -1}, {2, -1, -1} },
190 }, 324 },
191 [P4_EVENT_TC_MS_XFER] = { 325 [P4_EVENT_TC_MS_XFER] = {
192 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), 326 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
193 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 327 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
328 .escr_emask =
329 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
194 .cntr = { {4, 5, -1}, {6, 7, -1} }, 330 .cntr = { {4, 5, -1}, {6, 7, -1} },
195 }, 331 },
196 [P4_EVENT_UOP_QUEUE_WRITES] = { 332 [P4_EVENT_UOP_QUEUE_WRITES] = {
197 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), 333 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
198 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 334 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
335 .escr_emask =
336 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) |
337 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) |
338 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
199 .cntr = { {4, 5, -1}, {6, 7, -1} }, 339 .cntr = { {4, 5, -1}, {6, 7, -1} },
200 }, 340 },
201 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { 341 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
202 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), 342 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
203 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, 343 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
344 .escr_emask =
345 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) |
346 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) |
347 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) |
348 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
204 .cntr = { {4, 5, -1}, {6, 7, -1} }, 349 .cntr = { {4, 5, -1}, {6, 7, -1} },
205 }, 350 },
206 [P4_EVENT_RETIRED_BRANCH_TYPE] = { 351 [P4_EVENT_RETIRED_BRANCH_TYPE] = {
207 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), 352 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
208 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, 353 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
354 .escr_emask =
355 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
356 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
357 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
358 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
209 .cntr = { {4, 5, -1}, {6, 7, -1} }, 359 .cntr = { {4, 5, -1}, {6, 7, -1} },
210 }, 360 },
211 [P4_EVENT_RESOURCE_STALL] = { 361 [P4_EVENT_RESOURCE_STALL] = {
212 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), 362 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
213 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, 363 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
364 .escr_emask =
365 P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
214 .cntr = { {12, 13, 16}, {14, 15, 17} }, 366 .cntr = { {12, 13, 16}, {14, 15, 17} },
215 }, 367 },
216 [P4_EVENT_WC_BUFFER] = { 368 [P4_EVENT_WC_BUFFER] = {
217 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), 369 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
218 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 370 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
371 .escr_emask =
372 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) |
373 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
374 .shared = 1,
219 .cntr = { {8, 9, -1}, {10, 11, -1} }, 375 .cntr = { {8, 9, -1}, {10, 11, -1} },
220 }, 376 },
221 [P4_EVENT_B2B_CYCLES] = { 377 [P4_EVENT_B2B_CYCLES] = {
222 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), 378 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
223 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 379 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
380 .escr_emask = 0,
224 .cntr = { {0, -1, -1}, {2, -1, -1} }, 381 .cntr = { {0, -1, -1}, {2, -1, -1} },
225 }, 382 },
226 [P4_EVENT_BNR] = { 383 [P4_EVENT_BNR] = {
227 .opcode = P4_OPCODE(P4_EVENT_BNR), 384 .opcode = P4_OPCODE(P4_EVENT_BNR),
228 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 385 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
386 .escr_emask = 0,
229 .cntr = { {0, -1, -1}, {2, -1, -1} }, 387 .cntr = { {0, -1, -1}, {2, -1, -1} },
230 }, 388 },
231 [P4_EVENT_SNOOP] = { 389 [P4_EVENT_SNOOP] = {
232 .opcode = P4_OPCODE(P4_EVENT_SNOOP), 390 .opcode = P4_OPCODE(P4_EVENT_SNOOP),
233 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 391 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
392 .escr_emask = 0,
234 .cntr = { {0, -1, -1}, {2, -1, -1} }, 393 .cntr = { {0, -1, -1}, {2, -1, -1} },
235 }, 394 },
236 [P4_EVENT_RESPONSE] = { 395 [P4_EVENT_RESPONSE] = {
237 .opcode = P4_OPCODE(P4_EVENT_RESPONSE), 396 .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
238 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 397 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
398 .escr_emask = 0,
239 .cntr = { {0, -1, -1}, {2, -1, -1} }, 399 .cntr = { {0, -1, -1}, {2, -1, -1} },
240 }, 400 },
241 [P4_EVENT_FRONT_END_EVENT] = { 401 [P4_EVENT_FRONT_END_EVENT] = {
242 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), 402 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
243 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 403 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
404 .escr_emask =
405 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) |
406 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
244 .cntr = { {12, 13, 16}, {14, 15, 17} }, 407 .cntr = { {12, 13, 16}, {14, 15, 17} },
245 }, 408 },
246 [P4_EVENT_EXECUTION_EVENT] = { 409 [P4_EVENT_EXECUTION_EVENT] = {
247 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), 410 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
248 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 411 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
412 .escr_emask =
413 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
414 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
415 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
416 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
417 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
418 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
419 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
420 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
249 .cntr = { {12, 13, 16}, {14, 15, 17} }, 421 .cntr = { {12, 13, 16}, {14, 15, 17} },
250 }, 422 },
251 [P4_EVENT_REPLAY_EVENT] = { 423 [P4_EVENT_REPLAY_EVENT] = {
252 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), 424 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
253 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 425 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
426 .escr_emask =
427 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) |
428 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
254 .cntr = { {12, 13, 16}, {14, 15, 17} }, 429 .cntr = { {12, 13, 16}, {14, 15, 17} },
255 }, 430 },
256 [P4_EVENT_INSTR_RETIRED] = { 431 [P4_EVENT_INSTR_RETIRED] = {
257 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), 432 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
258 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 433 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
434 .escr_emask =
435 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
436 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) |
437 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) |
438 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
259 .cntr = { {12, 13, 16}, {14, 15, 17} }, 439 .cntr = { {12, 13, 16}, {14, 15, 17} },
260 }, 440 },
261 [P4_EVENT_UOPS_RETIRED] = { 441 [P4_EVENT_UOPS_RETIRED] = {
262 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), 442 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
263 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 443 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
444 .escr_emask =
445 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) |
446 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
264 .cntr = { {12, 13, 16}, {14, 15, 17} }, 447 .cntr = { {12, 13, 16}, {14, 15, 17} },
265 }, 448 },
266 [P4_EVENT_UOP_TYPE] = { 449 [P4_EVENT_UOP_TYPE] = {
267 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), 450 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
268 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, 451 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
452 .escr_emask =
453 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) |
454 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
269 .cntr = { {12, 13, 16}, {14, 15, 17} }, 455 .cntr = { {12, 13, 16}, {14, 15, 17} },
270 }, 456 },
271 [P4_EVENT_BRANCH_RETIRED] = { 457 [P4_EVENT_BRANCH_RETIRED] = {
272 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), 458 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
273 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 459 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
460 .escr_emask =
461 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) |
462 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) |
463 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) |
464 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
274 .cntr = { {12, 13, 16}, {14, 15, 17} }, 465 .cntr = { {12, 13, 16}, {14, 15, 17} },
275 }, 466 },
276 [P4_EVENT_MISPRED_BRANCH_RETIRED] = { 467 [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
277 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), 468 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
278 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 469 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
470 .escr_emask =
471 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
279 .cntr = { {12, 13, 16}, {14, 15, 17} }, 472 .cntr = { {12, 13, 16}, {14, 15, 17} },
280 }, 473 },
281 [P4_EVENT_X87_ASSIST] = { 474 [P4_EVENT_X87_ASSIST] = {
282 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), 475 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
283 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 476 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
477 .escr_emask =
478 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) |
479 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) |
480 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) |
481 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) |
482 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
284 .cntr = { {12, 13, 16}, {14, 15, 17} }, 483 .cntr = { {12, 13, 16}, {14, 15, 17} },
285 }, 484 },
286 [P4_EVENT_MACHINE_CLEAR] = { 485 [P4_EVENT_MACHINE_CLEAR] = {
287 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), 486 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
288 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 487 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
488 .escr_emask =
489 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) |
490 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) |
491 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
289 .cntr = { {12, 13, 16}, {14, 15, 17} }, 492 .cntr = { {12, 13, 16}, {14, 15, 17} },
290 }, 493 },
291 [P4_EVENT_INSTR_COMPLETED] = { 494 [P4_EVENT_INSTR_COMPLETED] = {
292 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), 495 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
293 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 496 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
497 .escr_emask =
498 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) |
499 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
294 .cntr = { {12, 13, 16}, {14, 15, 17} }, 500 .cntr = { {12, 13, 16}, {14, 15, 17} },
295 }, 501 },
296}; 502};
@@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event)
428 return config; 634 return config;
429} 635}
430 636
637/* check cpu model specifics */
638static bool p4_event_match_cpu_model(unsigned int event_idx)
639{
640 /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
641 if (event_idx == P4_EVENT_INSTR_COMPLETED) {
642 if (boot_cpu_data.x86_model != 3 &&
643 boot_cpu_data.x86_model != 4 &&
644 boot_cpu_data.x86_model != 6)
645 return false;
646 }
647
648 /*
649 * For info
650 * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
651 */
652
653 return true;
654}
655
431static int p4_validate_raw_event(struct perf_event *event) 656static int p4_validate_raw_event(struct perf_event *event)
432{ 657{
433 unsigned int v; 658 unsigned int v, emask;
434 659
435 /* user data may have out-of-bound event index */ 660 /* User data may have out-of-bound event index */
436 v = p4_config_unpack_event(event->attr.config); 661 v = p4_config_unpack_event(event->attr.config);
437 if (v >= ARRAY_SIZE(p4_event_bind_map)) { 662 if (v >= ARRAY_SIZE(p4_event_bind_map))
438 pr_warning("P4 PMU: Unknown event code: %d\n", v); 663 return -EINVAL;
664
665 /* It may be unsupported: */
666 if (!p4_event_match_cpu_model(v))
439 return -EINVAL; 667 return -EINVAL;
668
669 /*
670 * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
671 * in Architectural Performance Monitoring, it means not
672 * on _which_ logical cpu to count but rather _when_, ie it
673 * depends on logical cpu state -- count event if one cpu active,
674 * none, both or any, so we just allow user to pass any value
675 * desired.
676 *
677 * In turn we always set Tx_OS/Tx_USR bits bound to logical
678 * cpu without their propagation to another cpu
679 */
680
681 /*
682 * if an event is shared accross the logical threads
683 * the user needs special permissions to be able to use it
684 */
685 if (p4_event_bind_map[v].shared) {
686 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
687 return -EACCES;
440 } 688 }
441 689
690 /* ESCR EventMask bits may be invalid */
691 emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
692 if (emask & ~p4_event_bind_map[v].escr_emask)
693 return -EINVAL;
694
442 /* 695 /*
443 * it may have some screwed PEBS bits 696 * it may have some invalid PEBS bits
444 */ 697 */
445 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { 698 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
446 pr_warning("P4 PMU: PEBS are not supported yet\n");
447 return -EINVAL; 699 return -EINVAL;
448 } 700
449 v = p4_config_unpack_metric(event->attr.config); 701 v = p4_config_unpack_metric(event->attr.config);
450 if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { 702 if (v >= ARRAY_SIZE(p4_pebs_bind_map))
451 pr_warning("P4 PMU: Unknown metric code: %d\n", v);
452 return -EINVAL; 703 return -EINVAL;
453 }
454 704
455 return 0; 705 return 0;
456} 706}
@@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event)
478 728
479 if (event->attr.type == PERF_TYPE_RAW) { 729 if (event->attr.type == PERF_TYPE_RAW) {
480 730
731 /*
732 * Clear bits we reserve to be managed by kernel itself
733 * and never allowed from a user space
734 */
735 event->attr.config &= P4_CONFIG_MASK;
736
481 rc = p4_validate_raw_event(event); 737 rc = p4_validate_raw_event(event);
482 if (rc) 738 if (rc)
483 goto out; 739 goto out;
484 740
485 /* 741 /*
486 * We don't control raw events so it's up to the caller
487 * to pass sane values (and we don't count the thread number
488 * on HT machine but allow HT-compatible specifics to be
489 * passed on)
490 *
491 * Note that for RAW events we allow user to use P4_CCCR_RESERVED 742 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
492 * bits since we keep additional info here (for cache events and etc) 743 * bits since we keep additional info here (for cache events and etc)
493 *
494 * XXX: HT wide things should check perf_paranoid_cpu() &&
495 * CAP_SYS_ADMIN
496 */ 744 */
497 event->hw.config |= event->attr.config & 745 event->hw.config |= event->attr.config;
498 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
499 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
500
501 event->hw.config &= ~P4_CCCR_FORCE_OVF;
502 } 746 }
503 747
504 rc = x86_setup_perfctr(event); 748 rc = x86_setup_perfctr(event);
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index fb329e9f8494..d9f4ff8fcd69 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -700,11 +700,10 @@ static void probe_nmi_watchdog(void)
700{ 700{
701 switch (boot_cpu_data.x86_vendor) { 701 switch (boot_cpu_data.x86_vendor) {
702 case X86_VENDOR_AMD: 702 case X86_VENDOR_AMD:
703 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && 703 if (boot_cpu_data.x86 == 6 ||
704 boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) 704 (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
705 return; 705 wd_ops = &k7_wd_ops;
706 wd_ops = &k7_wd_ops; 706 return;
707 break;
708 case X86_VENDOR_INTEL: 707 case X86_VENDOR_INTEL:
709 /* Work around where perfctr1 doesn't have a working enable 708 /* Work around where perfctr1 doesn't have a working enable
710 * bit as described in the following errata: 709 * bit as described in the following errata:
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index d49079515122..c7f64e6f537a 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -44,6 +44,12 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
44 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, 44 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
45 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, 45 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
46 { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, 46 { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 },
47 { X86_FEATURE_TSCRATEMSR, CR_EDX, 4, 0x8000000a, 0 },
48 { X86_FEATURE_VMCBCLEAN, CR_EDX, 5, 0x8000000a, 0 },
49 { X86_FEATURE_FLUSHBYASID, CR_EDX, 6, 0x8000000a, 0 },
50 { X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 },
51 { X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 },
52 { X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 },
47 { 0, 0, 0, 0, 0 } 53 { 0, 0, 0, 0, 0 }
48 }; 54 };
49 55
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 045b36cada65..994828899e09 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -34,7 +34,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
34 if (!csize) 34 if (!csize)
35 return 0; 35 return 0;
36 36
37 vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); 37 vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE);
38 if (!vaddr) 38 if (!vaddr)
39 return -ENOMEM; 39 return -ENOMEM;
40 40
@@ -46,6 +46,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
46 } else 46 } else
47 memcpy(buf, vaddr + offset, csize); 47 memcpy(buf, vaddr + offset, csize);
48 48
49 set_iounmap_nonlazy();
49 iounmap(vaddr); 50 iounmap(vaddr);
50 return csize; 51 return csize;
51} 52}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index ebdb85cf2686..76b8cd953dee 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -97,7 +97,6 @@ static void __init nvidia_bugs(int num, int slot, int func)
97} 97}
98 98
99#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) 99#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
100#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
101static u32 __init ati_ixp4x0_rev(int num, int slot, int func) 100static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
102{ 101{
103 u32 d; 102 u32 d;
@@ -115,7 +114,6 @@ static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
115 d &= 0xff; 114 d &= 0xff;
116 return d; 115 return d;
117} 116}
118#endif
119 117
120static void __init ati_bugs(int num, int slot, int func) 118static void __init ati_bugs(int num, int slot, int func)
121{ 119{
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index fa99bae75ace..4572f25f9325 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -14,6 +14,7 @@
14#include <xen/hvc-console.h> 14#include <xen/hvc-console.h>
15#include <asm/pci-direct.h> 15#include <asm/pci-direct.h>
16#include <asm/fixmap.h> 16#include <asm/fixmap.h>
17#include <asm/mrst.h>
17#include <asm/pgtable.h> 18#include <asm/pgtable.h>
18#include <linux/usb/ehci_def.h> 19#include <linux/usb/ehci_def.h>
19 20
@@ -239,6 +240,18 @@ static int __init setup_early_printk(char *buf)
239 if (!strncmp(buf, "xen", 3)) 240 if (!strncmp(buf, "xen", 3))
240 early_console_register(&xenboot_console, keep); 241 early_console_register(&xenboot_console, keep);
241#endif 242#endif
243#ifdef CONFIG_X86_MRST_EARLY_PRINTK
244 if (!strncmp(buf, "mrst", 4)) {
245 mrst_early_console_init();
246 early_console_register(&early_mrst_console, keep);
247 }
248
249 if (!strncmp(buf, "hsu", 3)) {
250 hsu_early_console_init();
251 early_console_register(&early_hsu_console, keep);
252 }
253
254#endif
242 buf++; 255 buf++;
243 } 256 }
244 return 0; 257 return 0;
diff --git a/arch/x86/kernel/early_printk_mrst.c b/arch/x86/kernel/early_printk_mrst.c
new file mode 100644
index 000000000000..65df603622b2
--- /dev/null
+++ b/arch/x86/kernel/early_printk_mrst.c
@@ -0,0 +1,319 @@
1/*
2 * early_printk_mrst.c - early consoles for Intel MID platforms
3 *
4 * Copyright (c) 2008-2010, Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 */
11
12/*
13 * This file implements two early consoles named mrst and hsu.
14 * mrst is based on Maxim3110 spi-uart device, it exists in both
15 * Moorestown and Medfield platforms, while hsu is based on a High
16 * Speed UART device which only exists in the Medfield platform
17 */
18
19#include <linux/serial_reg.h>
20#include <linux/serial_mfd.h>
21#include <linux/kmsg_dump.h>
22#include <linux/console.h>
23#include <linux/kernel.h>
24#include <linux/delay.h>
25#include <linux/init.h>
26#include <linux/io.h>
27
28#include <asm/fixmap.h>
29#include <asm/pgtable.h>
30#include <asm/mrst.h>
31
32#define MRST_SPI_TIMEOUT 0x200000
33#define MRST_REGBASE_SPI0 0xff128000
34#define MRST_REGBASE_SPI1 0xff128400
35#define MRST_CLK_SPI0_REG 0xff11d86c
36
37/* Bit fields in CTRLR0 */
38#define SPI_DFS_OFFSET 0
39
40#define SPI_FRF_OFFSET 4
41#define SPI_FRF_SPI 0x0
42#define SPI_FRF_SSP 0x1
43#define SPI_FRF_MICROWIRE 0x2
44#define SPI_FRF_RESV 0x3
45
46#define SPI_MODE_OFFSET 6
47#define SPI_SCPH_OFFSET 6
48#define SPI_SCOL_OFFSET 7
49#define SPI_TMOD_OFFSET 8
50#define SPI_TMOD_TR 0x0 /* xmit & recv */
51#define SPI_TMOD_TO 0x1 /* xmit only */
52#define SPI_TMOD_RO 0x2 /* recv only */
53#define SPI_TMOD_EPROMREAD 0x3 /* eeprom read mode */
54
55#define SPI_SLVOE_OFFSET 10
56#define SPI_SRL_OFFSET 11
57#define SPI_CFS_OFFSET 12
58
59/* Bit fields in SR, 7 bits */
60#define SR_MASK 0x7f /* cover 7 bits */
61#define SR_BUSY (1 << 0)
62#define SR_TF_NOT_FULL (1 << 1)
63#define SR_TF_EMPT (1 << 2)
64#define SR_RF_NOT_EMPT (1 << 3)
65#define SR_RF_FULL (1 << 4)
66#define SR_TX_ERR (1 << 5)
67#define SR_DCOL (1 << 6)
68
69struct dw_spi_reg {
70 u32 ctrl0;
71 u32 ctrl1;
72 u32 ssienr;
73 u32 mwcr;
74 u32 ser;
75 u32 baudr;
76 u32 txfltr;
77 u32 rxfltr;
78 u32 txflr;
79 u32 rxflr;
80 u32 sr;
81 u32 imr;
82 u32 isr;
83 u32 risr;
84 u32 txoicr;
85 u32 rxoicr;
86 u32 rxuicr;
87 u32 msticr;
88 u32 icr;
89 u32 dmacr;
90 u32 dmatdlr;
91 u32 dmardlr;
92 u32 idr;
93 u32 version;
94
95 /* Currently operates as 32 bits, though only the low 16 bits matter */
96 u32 dr;
97} __packed;
98
99#define dw_readl(dw, name) __raw_readl(&(dw)->name)
100#define dw_writel(dw, name, val) __raw_writel((val), &(dw)->name)
101
102/* Default use SPI0 register for mrst, we will detect Penwell and use SPI1 */
103static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0;
104
105static u32 *pclk_spi0;
106/* Always contains an accessable address, start with 0 */
107static struct dw_spi_reg *pspi;
108
109static struct kmsg_dumper dw_dumper;
110static int dumper_registered;
111
112static void dw_kmsg_dump(struct kmsg_dumper *dumper,
113 enum kmsg_dump_reason reason,
114 const char *s1, unsigned long l1,
115 const char *s2, unsigned long l2)
116{
117 int i;
118
119 /* When run to this, we'd better re-init the HW */
120 mrst_early_console_init();
121
122 for (i = 0; i < l1; i++)
123 early_mrst_console.write(&early_mrst_console, s1 + i, 1);
124 for (i = 0; i < l2; i++)
125 early_mrst_console.write(&early_mrst_console, s2 + i, 1);
126}
127
128/* Set the ratio rate to 115200, 8n1, IRQ disabled */
129static void max3110_write_config(void)
130{
131 u16 config;
132
133 config = 0xc001;
134 dw_writel(pspi, dr, config);
135}
136
137/* Translate char to a eligible word and send to max3110 */
138static void max3110_write_data(char c)
139{
140 u16 data;
141
142 data = 0x8000 | c;
143 dw_writel(pspi, dr, data);
144}
145
146void mrst_early_console_init(void)
147{
148 u32 ctrlr0 = 0;
149 u32 spi0_cdiv;
150 u32 freq; /* Freqency info only need be searched once */
151
152 /* Base clk is 100 MHz, the actual clk = 100M / (clk_divider + 1) */
153 pclk_spi0 = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
154 MRST_CLK_SPI0_REG);
155 spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9;
156 freq = 100000000 / (spi0_cdiv + 1);
157
158 if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL)
159 mrst_spi_paddr = MRST_REGBASE_SPI1;
160
161 pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
162 mrst_spi_paddr);
163
164 /* Disable SPI controller */
165 dw_writel(pspi, ssienr, 0);
166
167 /* Set control param, 8 bits, transmit only mode */
168 ctrlr0 = dw_readl(pspi, ctrl0);
169
170 ctrlr0 &= 0xfcc0;
171 ctrlr0 |= 0xf | (SPI_FRF_SPI << SPI_FRF_OFFSET)
172 | (SPI_TMOD_TO << SPI_TMOD_OFFSET);
173 dw_writel(pspi, ctrl0, ctrlr0);
174
175 /*
176 * Change the spi0 clk to comply with 115200 bps, use 100000 to
177 * calculate the clk dividor to make the clock a little slower
178 * than real baud rate.
179 */
180 dw_writel(pspi, baudr, freq/100000);
181
182 /* Disable all INT for early phase */
183 dw_writel(pspi, imr, 0x0);
184
185 /* Set the cs to spi-uart */
186 dw_writel(pspi, ser, 0x2);
187
188 /* Enable the HW, the last step for HW init */
189 dw_writel(pspi, ssienr, 0x1);
190
191 /* Set the default configuration */
192 max3110_write_config();
193
194 /* Register the kmsg dumper */
195 if (!dumper_registered) {
196 dw_dumper.dump = dw_kmsg_dump;
197 kmsg_dump_register(&dw_dumper);
198 dumper_registered = 1;
199 }
200}
201
202/* Slave select should be called in the read/write function */
203static void early_mrst_spi_putc(char c)
204{
205 unsigned int timeout;
206 u32 sr;
207
208 timeout = MRST_SPI_TIMEOUT;
209 /* Early putc needs to make sure the TX FIFO is not full */
210 while (--timeout) {
211 sr = dw_readl(pspi, sr);
212 if (!(sr & SR_TF_NOT_FULL))
213 cpu_relax();
214 else
215 break;
216 }
217
218 if (!timeout)
219 pr_warning("MRST earlycon: timed out\n");
220 else
221 max3110_write_data(c);
222}
223
224/* Early SPI only uses polling mode */
225static void early_mrst_spi_write(struct console *con, const char *str, unsigned n)
226{
227 int i;
228
229 for (i = 0; i < n && *str; i++) {
230 if (*str == '\n')
231 early_mrst_spi_putc('\r');
232 early_mrst_spi_putc(*str);
233 str++;
234 }
235}
236
237struct console early_mrst_console = {
238 .name = "earlymrst",
239 .write = early_mrst_spi_write,
240 .flags = CON_PRINTBUFFER,
241 .index = -1,
242};
243
244/*
245 * Following is the early console based on Medfield HSU (High
246 * Speed UART) device.
247 */
248#define HSU_PORT2_PADDR 0xffa28180
249
250static void __iomem *phsu;
251
252void hsu_early_console_init(void)
253{
254 u8 lcr;
255
256 phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
257 HSU_PORT2_PADDR);
258
259 /* Disable FIFO */
260 writeb(0x0, phsu + UART_FCR);
261
262 /* Set to default 115200 bps, 8n1 */
263 lcr = readb(phsu + UART_LCR);
264 writeb((0x80 | lcr), phsu + UART_LCR);
265 writeb(0x18, phsu + UART_DLL);
266 writeb(lcr, phsu + UART_LCR);
267 writel(0x3600, phsu + UART_MUL*4);
268
269 writeb(0x8, phsu + UART_MCR);
270 writeb(0x7, phsu + UART_FCR);
271 writeb(0x3, phsu + UART_LCR);
272
273 /* Clear IRQ status */
274 readb(phsu + UART_LSR);
275 readb(phsu + UART_RX);
276 readb(phsu + UART_IIR);
277 readb(phsu + UART_MSR);
278
279 /* Enable FIFO */
280 writeb(0x7, phsu + UART_FCR);
281}
282
283#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
284
285static void early_hsu_putc(char ch)
286{
287 unsigned int timeout = 10000; /* 10ms */
288 u8 status;
289
290 while (--timeout) {
291 status = readb(phsu + UART_LSR);
292 if (status & BOTH_EMPTY)
293 break;
294 udelay(1);
295 }
296
297 /* Only write the char when there was no timeout */
298 if (timeout)
299 writeb(ch, phsu + UART_TX);
300}
301
302static void early_hsu_write(struct console *con, const char *str, unsigned n)
303{
304 int i;
305
306 for (i = 0; i < n && *str; i++) {
307 if (*str == '\n')
308 early_hsu_putc('\r');
309 early_hsu_putc(*str);
310 str++;
311 }
312}
313
314struct console early_hsu_console = {
315 .name = "earlyhsu",
316 .write = early_hsu_write,
317 .flags = CON_PRINTBUFFER,
318 .index = -1,
319};
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 227d00920d2f..9fb188d7bc76 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -115,8 +115,7 @@
115 115
116 /* unfortunately push/pop can't be no-op */ 116 /* unfortunately push/pop can't be no-op */
117.macro PUSH_GS 117.macro PUSH_GS
118 pushl $0 118 pushl_cfi $0
119 CFI_ADJUST_CFA_OFFSET 4
120.endm 119.endm
121.macro POP_GS pop=0 120.macro POP_GS pop=0
122 addl $(4 + \pop), %esp 121 addl $(4 + \pop), %esp
@@ -140,14 +139,12 @@
140#else /* CONFIG_X86_32_LAZY_GS */ 139#else /* CONFIG_X86_32_LAZY_GS */
141 140
142.macro PUSH_GS 141.macro PUSH_GS
143 pushl %gs 142 pushl_cfi %gs
144 CFI_ADJUST_CFA_OFFSET 4
145 /*CFI_REL_OFFSET gs, 0*/ 143 /*CFI_REL_OFFSET gs, 0*/
146.endm 144.endm
147 145
148.macro POP_GS pop=0 146.macro POP_GS pop=0
14998: popl %gs 14798: popl_cfi %gs
150 CFI_ADJUST_CFA_OFFSET -4
151 /*CFI_RESTORE gs*/ 148 /*CFI_RESTORE gs*/
152 .if \pop <> 0 149 .if \pop <> 0
153 add $\pop, %esp 150 add $\pop, %esp
@@ -195,35 +192,25 @@
195.macro SAVE_ALL 192.macro SAVE_ALL
196 cld 193 cld
197 PUSH_GS 194 PUSH_GS
198 pushl %fs 195 pushl_cfi %fs
199 CFI_ADJUST_CFA_OFFSET 4
200 /*CFI_REL_OFFSET fs, 0;*/ 196 /*CFI_REL_OFFSET fs, 0;*/
201 pushl %es 197 pushl_cfi %es
202 CFI_ADJUST_CFA_OFFSET 4
203 /*CFI_REL_OFFSET es, 0;*/ 198 /*CFI_REL_OFFSET es, 0;*/
204 pushl %ds 199 pushl_cfi %ds
205 CFI_ADJUST_CFA_OFFSET 4
206 /*CFI_REL_OFFSET ds, 0;*/ 200 /*CFI_REL_OFFSET ds, 0;*/
207 pushl %eax 201 pushl_cfi %eax
208 CFI_ADJUST_CFA_OFFSET 4
209 CFI_REL_OFFSET eax, 0 202 CFI_REL_OFFSET eax, 0
210 pushl %ebp 203 pushl_cfi %ebp
211 CFI_ADJUST_CFA_OFFSET 4
212 CFI_REL_OFFSET ebp, 0 204 CFI_REL_OFFSET ebp, 0
213 pushl %edi 205 pushl_cfi %edi
214 CFI_ADJUST_CFA_OFFSET 4
215 CFI_REL_OFFSET edi, 0 206 CFI_REL_OFFSET edi, 0
216 pushl %esi 207 pushl_cfi %esi
217 CFI_ADJUST_CFA_OFFSET 4
218 CFI_REL_OFFSET esi, 0 208 CFI_REL_OFFSET esi, 0
219 pushl %edx 209 pushl_cfi %edx
220 CFI_ADJUST_CFA_OFFSET 4
221 CFI_REL_OFFSET edx, 0 210 CFI_REL_OFFSET edx, 0
222 pushl %ecx 211 pushl_cfi %ecx
223 CFI_ADJUST_CFA_OFFSET 4
224 CFI_REL_OFFSET ecx, 0 212 CFI_REL_OFFSET ecx, 0
225 pushl %ebx 213 pushl_cfi %ebx
226 CFI_ADJUST_CFA_OFFSET 4
227 CFI_REL_OFFSET ebx, 0 214 CFI_REL_OFFSET ebx, 0
228 movl $(__USER_DS), %edx 215 movl $(__USER_DS), %edx
229 movl %edx, %ds 216 movl %edx, %ds
@@ -234,39 +221,29 @@
234.endm 221.endm
235 222
236.macro RESTORE_INT_REGS 223.macro RESTORE_INT_REGS
237 popl %ebx 224 popl_cfi %ebx
238 CFI_ADJUST_CFA_OFFSET -4
239 CFI_RESTORE ebx 225 CFI_RESTORE ebx
240 popl %ecx 226 popl_cfi %ecx
241 CFI_ADJUST_CFA_OFFSET -4
242 CFI_RESTORE ecx 227 CFI_RESTORE ecx
243 popl %edx 228 popl_cfi %edx
244 CFI_ADJUST_CFA_OFFSET -4
245 CFI_RESTORE edx 229 CFI_RESTORE edx
246 popl %esi 230 popl_cfi %esi
247 CFI_ADJUST_CFA_OFFSET -4
248 CFI_RESTORE esi 231 CFI_RESTORE esi
249 popl %edi 232 popl_cfi %edi
250 CFI_ADJUST_CFA_OFFSET -4
251 CFI_RESTORE edi 233 CFI_RESTORE edi
252 popl %ebp 234 popl_cfi %ebp
253 CFI_ADJUST_CFA_OFFSET -4
254 CFI_RESTORE ebp 235 CFI_RESTORE ebp
255 popl %eax 236 popl_cfi %eax
256 CFI_ADJUST_CFA_OFFSET -4
257 CFI_RESTORE eax 237 CFI_RESTORE eax
258.endm 238.endm
259 239
260.macro RESTORE_REGS pop=0 240.macro RESTORE_REGS pop=0
261 RESTORE_INT_REGS 241 RESTORE_INT_REGS
2621: popl %ds 2421: popl_cfi %ds
263 CFI_ADJUST_CFA_OFFSET -4
264 /*CFI_RESTORE ds;*/ 243 /*CFI_RESTORE ds;*/
2652: popl %es 2442: popl_cfi %es
266 CFI_ADJUST_CFA_OFFSET -4
267 /*CFI_RESTORE es;*/ 245 /*CFI_RESTORE es;*/
2683: popl %fs 2463: popl_cfi %fs
269 CFI_ADJUST_CFA_OFFSET -4
270 /*CFI_RESTORE fs;*/ 247 /*CFI_RESTORE fs;*/
271 POP_GS \pop 248 POP_GS \pop
272.pushsection .fixup, "ax" 249.pushsection .fixup, "ax"
@@ -320,16 +297,12 @@
320 297
321ENTRY(ret_from_fork) 298ENTRY(ret_from_fork)
322 CFI_STARTPROC 299 CFI_STARTPROC
323 pushl %eax 300 pushl_cfi %eax
324 CFI_ADJUST_CFA_OFFSET 4
325 call schedule_tail 301 call schedule_tail
326 GET_THREAD_INFO(%ebp) 302 GET_THREAD_INFO(%ebp)
327 popl %eax 303 popl_cfi %eax
328 CFI_ADJUST_CFA_OFFSET -4 304 pushl_cfi $0x0202 # Reset kernel eflags
329 pushl $0x0202 # Reset kernel eflags 305 popfl_cfi
330 CFI_ADJUST_CFA_OFFSET 4
331 popfl
332 CFI_ADJUST_CFA_OFFSET -4
333 jmp syscall_exit 306 jmp syscall_exit
334 CFI_ENDPROC 307 CFI_ENDPROC
335END(ret_from_fork) 308END(ret_from_fork)
@@ -409,29 +382,23 @@ sysenter_past_esp:
409 * enough kernel state to call TRACE_IRQS_OFF can be called - but 382 * enough kernel state to call TRACE_IRQS_OFF can be called - but
410 * we immediately enable interrupts at that point anyway. 383 * we immediately enable interrupts at that point anyway.
411 */ 384 */
412 pushl $(__USER_DS) 385 pushl_cfi $(__USER_DS)
413 CFI_ADJUST_CFA_OFFSET 4
414 /*CFI_REL_OFFSET ss, 0*/ 386 /*CFI_REL_OFFSET ss, 0*/
415 pushl %ebp 387 pushl_cfi %ebp
416 CFI_ADJUST_CFA_OFFSET 4
417 CFI_REL_OFFSET esp, 0 388 CFI_REL_OFFSET esp, 0
418 pushfl 389 pushfl_cfi
419 orl $X86_EFLAGS_IF, (%esp) 390 orl $X86_EFLAGS_IF, (%esp)
420 CFI_ADJUST_CFA_OFFSET 4 391 pushl_cfi $(__USER_CS)
421 pushl $(__USER_CS)
422 CFI_ADJUST_CFA_OFFSET 4
423 /*CFI_REL_OFFSET cs, 0*/ 392 /*CFI_REL_OFFSET cs, 0*/
424 /* 393 /*
425 * Push current_thread_info()->sysenter_return to the stack. 394 * Push current_thread_info()->sysenter_return to the stack.
426 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words 395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
427 * pushed above; +8 corresponds to copy_thread's esp0 setting. 396 * pushed above; +8 corresponds to copy_thread's esp0 setting.
428 */ 397 */
429 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) 398 pushl_cfi (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
430 CFI_ADJUST_CFA_OFFSET 4
431 CFI_REL_OFFSET eip, 0 399 CFI_REL_OFFSET eip, 0
432 400
433 pushl %eax 401 pushl_cfi %eax
434 CFI_ADJUST_CFA_OFFSET 4
435 SAVE_ALL 402 SAVE_ALL
436 ENABLE_INTERRUPTS(CLBR_NONE) 403 ENABLE_INTERRUPTS(CLBR_NONE)
437 404
@@ -486,8 +453,7 @@ sysenter_audit:
486 movl %eax,%edx /* 2nd arg: syscall number */ 453 movl %eax,%edx /* 2nd arg: syscall number */
487 movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ 454 movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
488 call audit_syscall_entry 455 call audit_syscall_entry
489 pushl %ebx 456 pushl_cfi %ebx
490 CFI_ADJUST_CFA_OFFSET 4
491 movl PT_EAX(%esp),%eax /* reload syscall number */ 457 movl PT_EAX(%esp),%eax /* reload syscall number */
492 jmp sysenter_do_call 458 jmp sysenter_do_call
493 459
@@ -529,8 +495,7 @@ ENDPROC(ia32_sysenter_target)
529 # system call handler stub 495 # system call handler stub
530ENTRY(system_call) 496ENTRY(system_call)
531 RING0_INT_FRAME # can't unwind into user space anyway 497 RING0_INT_FRAME # can't unwind into user space anyway
532 pushl %eax # save orig_eax 498 pushl_cfi %eax # save orig_eax
533 CFI_ADJUST_CFA_OFFSET 4
534 SAVE_ALL 499 SAVE_ALL
535 GET_THREAD_INFO(%ebp) 500 GET_THREAD_INFO(%ebp)
536 # system call tracing in operation / emulation 501 # system call tracing in operation / emulation
@@ -566,7 +531,6 @@ restore_all_notrace:
566 je ldt_ss # returning to user-space with LDT SS 531 je ldt_ss # returning to user-space with LDT SS
567restore_nocheck: 532restore_nocheck:
568 RESTORE_REGS 4 # skip orig_eax/error_code 533 RESTORE_REGS 4 # skip orig_eax/error_code
569 CFI_ADJUST_CFA_OFFSET -4
570irq_return: 534irq_return:
571 INTERRUPT_RETURN 535 INTERRUPT_RETURN
572.section .fixup,"ax" 536.section .fixup,"ax"
@@ -619,10 +583,8 @@ ldt_ss:
619 shr $16, %edx 583 shr $16, %edx
620 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ 584 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
621 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ 585 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
622 pushl $__ESPFIX_SS 586 pushl_cfi $__ESPFIX_SS
623 CFI_ADJUST_CFA_OFFSET 4 587 pushl_cfi %eax /* new kernel esp */
624 push %eax /* new kernel esp */
625 CFI_ADJUST_CFA_OFFSET 4
626 /* Disable interrupts, but do not irqtrace this section: we 588 /* Disable interrupts, but do not irqtrace this section: we
627 * will soon execute iret and the tracer was already set to 589 * will soon execute iret and the tracer was already set to
628 * the irqstate after the iret */ 590 * the irqstate after the iret */
@@ -666,11 +628,9 @@ work_notifysig: # deal with pending signals and
666 628
667 ALIGN 629 ALIGN
668work_notifysig_v86: 630work_notifysig_v86:
669 pushl %ecx # save ti_flags for do_notify_resume 631 pushl_cfi %ecx # save ti_flags for do_notify_resume
670 CFI_ADJUST_CFA_OFFSET 4
671 call save_v86_state # %eax contains pt_regs pointer 632 call save_v86_state # %eax contains pt_regs pointer
672 popl %ecx 633 popl_cfi %ecx
673 CFI_ADJUST_CFA_OFFSET -4
674 movl %eax, %esp 634 movl %eax, %esp
675#else 635#else
676 movl %esp, %eax 636 movl %esp, %eax
@@ -750,14 +710,18 @@ ptregs_##name: \
750#define PTREGSCALL3(name) \ 710#define PTREGSCALL3(name) \
751 ALIGN; \ 711 ALIGN; \
752ptregs_##name: \ 712ptregs_##name: \
713 CFI_STARTPROC; \
753 leal 4(%esp),%eax; \ 714 leal 4(%esp),%eax; \
754 pushl %eax; \ 715 pushl_cfi %eax; \
755 movl PT_EDX(%eax),%ecx; \ 716 movl PT_EDX(%eax),%ecx; \
756 movl PT_ECX(%eax),%edx; \ 717 movl PT_ECX(%eax),%edx; \
757 movl PT_EBX(%eax),%eax; \ 718 movl PT_EBX(%eax),%eax; \
758 call sys_##name; \ 719 call sys_##name; \
759 addl $4,%esp; \ 720 addl $4,%esp; \
760 ret 721 CFI_ADJUST_CFA_OFFSET -4; \
722 ret; \
723 CFI_ENDPROC; \
724ENDPROC(ptregs_##name)
761 725
762PTREGSCALL1(iopl) 726PTREGSCALL1(iopl)
763PTREGSCALL0(fork) 727PTREGSCALL0(fork)
@@ -772,15 +736,19 @@ PTREGSCALL1(vm86old)
772/* Clone is an oddball. The 4th arg is in %edi */ 736/* Clone is an oddball. The 4th arg is in %edi */
773 ALIGN; 737 ALIGN;
774ptregs_clone: 738ptregs_clone:
739 CFI_STARTPROC
775 leal 4(%esp),%eax 740 leal 4(%esp),%eax
776 pushl %eax 741 pushl_cfi %eax
777 pushl PT_EDI(%eax) 742 pushl_cfi PT_EDI(%eax)
778 movl PT_EDX(%eax),%ecx 743 movl PT_EDX(%eax),%ecx
779 movl PT_ECX(%eax),%edx 744 movl PT_ECX(%eax),%edx
780 movl PT_EBX(%eax),%eax 745 movl PT_EBX(%eax),%eax
781 call sys_clone 746 call sys_clone
782 addl $8,%esp 747 addl $8,%esp
748 CFI_ADJUST_CFA_OFFSET -8
783 ret 749 ret
750 CFI_ENDPROC
751ENDPROC(ptregs_clone)
784 752
785.macro FIXUP_ESPFIX_STACK 753.macro FIXUP_ESPFIX_STACK
786/* 754/*
@@ -795,10 +763,8 @@ ptregs_clone:
795 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ 763 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
796 shl $16, %eax 764 shl $16, %eax
797 addl %esp, %eax /* the adjusted stack pointer */ 765 addl %esp, %eax /* the adjusted stack pointer */
798 pushl $__KERNEL_DS 766 pushl_cfi $__KERNEL_DS
799 CFI_ADJUST_CFA_OFFSET 4 767 pushl_cfi %eax
800 pushl %eax
801 CFI_ADJUST_CFA_OFFSET 4
802 lss (%esp), %esp /* switch to the normal stack segment */ 768 lss (%esp), %esp /* switch to the normal stack segment */
803 CFI_ADJUST_CFA_OFFSET -8 769 CFI_ADJUST_CFA_OFFSET -8
804.endm 770.endm
@@ -835,8 +801,7 @@ vector=FIRST_EXTERNAL_VECTOR
835 .if vector <> FIRST_EXTERNAL_VECTOR 801 .if vector <> FIRST_EXTERNAL_VECTOR
836 CFI_ADJUST_CFA_OFFSET -4 802 CFI_ADJUST_CFA_OFFSET -4
837 .endif 803 .endif
8381: pushl $(~vector+0x80) /* Note: always in signed byte range */ 8041: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
839 CFI_ADJUST_CFA_OFFSET 4
840 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 805 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
841 jmp 2f 806 jmp 2f
842 .endif 807 .endif
@@ -876,8 +841,7 @@ ENDPROC(common_interrupt)
876#define BUILD_INTERRUPT3(name, nr, fn) \ 841#define BUILD_INTERRUPT3(name, nr, fn) \
877ENTRY(name) \ 842ENTRY(name) \
878 RING0_INT_FRAME; \ 843 RING0_INT_FRAME; \
879 pushl $~(nr); \ 844 pushl_cfi $~(nr); \
880 CFI_ADJUST_CFA_OFFSET 4; \
881 SAVE_ALL; \ 845 SAVE_ALL; \
882 TRACE_IRQS_OFF \ 846 TRACE_IRQS_OFF \
883 movl %esp,%eax; \ 847 movl %esp,%eax; \
@@ -893,21 +857,18 @@ ENDPROC(name)
893 857
894ENTRY(coprocessor_error) 858ENTRY(coprocessor_error)
895 RING0_INT_FRAME 859 RING0_INT_FRAME
896 pushl $0 860 pushl_cfi $0
897 CFI_ADJUST_CFA_OFFSET 4 861 pushl_cfi $do_coprocessor_error
898 pushl $do_coprocessor_error
899 CFI_ADJUST_CFA_OFFSET 4
900 jmp error_code 862 jmp error_code
901 CFI_ENDPROC 863 CFI_ENDPROC
902END(coprocessor_error) 864END(coprocessor_error)
903 865
904ENTRY(simd_coprocessor_error) 866ENTRY(simd_coprocessor_error)
905 RING0_INT_FRAME 867 RING0_INT_FRAME
906 pushl $0 868 pushl_cfi $0
907 CFI_ADJUST_CFA_OFFSET 4
908#ifdef CONFIG_X86_INVD_BUG 869#ifdef CONFIG_X86_INVD_BUG
909 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ 870 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
910661: pushl $do_general_protection 871661: pushl_cfi $do_general_protection
911662: 872662:
912.section .altinstructions,"a" 873.section .altinstructions,"a"
913 .balign 4 874 .balign 4
@@ -922,19 +883,16 @@ ENTRY(simd_coprocessor_error)
922664: 883664:
923.previous 884.previous
924#else 885#else
925 pushl $do_simd_coprocessor_error 886 pushl_cfi $do_simd_coprocessor_error
926#endif 887#endif
927 CFI_ADJUST_CFA_OFFSET 4
928 jmp error_code 888 jmp error_code
929 CFI_ENDPROC 889 CFI_ENDPROC
930END(simd_coprocessor_error) 890END(simd_coprocessor_error)
931 891
932ENTRY(device_not_available) 892ENTRY(device_not_available)
933 RING0_INT_FRAME 893 RING0_INT_FRAME
934 pushl $-1 # mark this as an int 894 pushl_cfi $-1 # mark this as an int
935 CFI_ADJUST_CFA_OFFSET 4 895 pushl_cfi $do_device_not_available
936 pushl $do_device_not_available
937 CFI_ADJUST_CFA_OFFSET 4
938 jmp error_code 896 jmp error_code
939 CFI_ENDPROC 897 CFI_ENDPROC
940END(device_not_available) 898END(device_not_available)
@@ -956,82 +914,68 @@ END(native_irq_enable_sysexit)
956 914
957ENTRY(overflow) 915ENTRY(overflow)
958 RING0_INT_FRAME 916 RING0_INT_FRAME
959 pushl $0 917 pushl_cfi $0
960 CFI_ADJUST_CFA_OFFSET 4 918 pushl_cfi $do_overflow
961 pushl $do_overflow
962 CFI_ADJUST_CFA_OFFSET 4
963 jmp error_code 919 jmp error_code
964 CFI_ENDPROC 920 CFI_ENDPROC
965END(overflow) 921END(overflow)
966 922
967ENTRY(bounds) 923ENTRY(bounds)
968 RING0_INT_FRAME 924 RING0_INT_FRAME
969 pushl $0 925 pushl_cfi $0
970 CFI_ADJUST_CFA_OFFSET 4 926 pushl_cfi $do_bounds
971 pushl $do_bounds
972 CFI_ADJUST_CFA_OFFSET 4
973 jmp error_code 927 jmp error_code
974 CFI_ENDPROC 928 CFI_ENDPROC
975END(bounds) 929END(bounds)
976 930
977ENTRY(invalid_op) 931ENTRY(invalid_op)
978 RING0_INT_FRAME 932 RING0_INT_FRAME
979 pushl $0 933 pushl_cfi $0
980 CFI_ADJUST_CFA_OFFSET 4 934 pushl_cfi $do_invalid_op
981 pushl $do_invalid_op
982 CFI_ADJUST_CFA_OFFSET 4
983 jmp error_code 935 jmp error_code
984 CFI_ENDPROC 936 CFI_ENDPROC
985END(invalid_op) 937END(invalid_op)
986 938
987ENTRY(coprocessor_segment_overrun) 939ENTRY(coprocessor_segment_overrun)
988 RING0_INT_FRAME 940 RING0_INT_FRAME
989 pushl $0 941 pushl_cfi $0
990 CFI_ADJUST_CFA_OFFSET 4 942 pushl_cfi $do_coprocessor_segment_overrun
991 pushl $do_coprocessor_segment_overrun
992 CFI_ADJUST_CFA_OFFSET 4
993 jmp error_code 943 jmp error_code
994 CFI_ENDPROC 944 CFI_ENDPROC
995END(coprocessor_segment_overrun) 945END(coprocessor_segment_overrun)
996 946
997ENTRY(invalid_TSS) 947ENTRY(invalid_TSS)
998 RING0_EC_FRAME 948 RING0_EC_FRAME
999 pushl $do_invalid_TSS 949 pushl_cfi $do_invalid_TSS
1000 CFI_ADJUST_CFA_OFFSET 4
1001 jmp error_code 950 jmp error_code
1002 CFI_ENDPROC 951 CFI_ENDPROC
1003END(invalid_TSS) 952END(invalid_TSS)
1004 953
1005ENTRY(segment_not_present) 954ENTRY(segment_not_present)
1006 RING0_EC_FRAME 955 RING0_EC_FRAME
1007 pushl $do_segment_not_present 956 pushl_cfi $do_segment_not_present
1008 CFI_ADJUST_CFA_OFFSET 4
1009 jmp error_code 957 jmp error_code
1010 CFI_ENDPROC 958 CFI_ENDPROC
1011END(segment_not_present) 959END(segment_not_present)
1012 960
1013ENTRY(stack_segment) 961ENTRY(stack_segment)
1014 RING0_EC_FRAME 962 RING0_EC_FRAME
1015 pushl $do_stack_segment 963 pushl_cfi $do_stack_segment
1016 CFI_ADJUST_CFA_OFFSET 4
1017 jmp error_code 964 jmp error_code
1018 CFI_ENDPROC 965 CFI_ENDPROC
1019END(stack_segment) 966END(stack_segment)
1020 967
1021ENTRY(alignment_check) 968ENTRY(alignment_check)
1022 RING0_EC_FRAME 969 RING0_EC_FRAME
1023 pushl $do_alignment_check 970 pushl_cfi $do_alignment_check
1024 CFI_ADJUST_CFA_OFFSET 4
1025 jmp error_code 971 jmp error_code
1026 CFI_ENDPROC 972 CFI_ENDPROC
1027END(alignment_check) 973END(alignment_check)
1028 974
1029ENTRY(divide_error) 975ENTRY(divide_error)
1030 RING0_INT_FRAME 976 RING0_INT_FRAME
1031 pushl $0 # no error code 977 pushl_cfi $0 # no error code
1032 CFI_ADJUST_CFA_OFFSET 4 978 pushl_cfi $do_divide_error
1033 pushl $do_divide_error
1034 CFI_ADJUST_CFA_OFFSET 4
1035 jmp error_code 979 jmp error_code
1036 CFI_ENDPROC 980 CFI_ENDPROC
1037END(divide_error) 981END(divide_error)
@@ -1039,10 +983,8 @@ END(divide_error)
1039#ifdef CONFIG_X86_MCE 983#ifdef CONFIG_X86_MCE
1040ENTRY(machine_check) 984ENTRY(machine_check)
1041 RING0_INT_FRAME 985 RING0_INT_FRAME
1042 pushl $0 986 pushl_cfi $0
1043 CFI_ADJUST_CFA_OFFSET 4 987 pushl_cfi machine_check_vector
1044 pushl machine_check_vector
1045 CFI_ADJUST_CFA_OFFSET 4
1046 jmp error_code 988 jmp error_code
1047 CFI_ENDPROC 989 CFI_ENDPROC
1048END(machine_check) 990END(machine_check)
@@ -1050,10 +992,8 @@ END(machine_check)
1050 992
1051ENTRY(spurious_interrupt_bug) 993ENTRY(spurious_interrupt_bug)
1052 RING0_INT_FRAME 994 RING0_INT_FRAME
1053 pushl $0 995 pushl_cfi $0
1054 CFI_ADJUST_CFA_OFFSET 4 996 pushl_cfi $do_spurious_interrupt_bug
1055 pushl $do_spurious_interrupt_bug
1056 CFI_ADJUST_CFA_OFFSET 4
1057 jmp error_code 997 jmp error_code
1058 CFI_ENDPROC 998 CFI_ENDPROC
1059END(spurious_interrupt_bug) 999END(spurious_interrupt_bug)
@@ -1084,8 +1024,7 @@ ENTRY(xen_sysenter_target)
1084 1024
1085ENTRY(xen_hypervisor_callback) 1025ENTRY(xen_hypervisor_callback)
1086 CFI_STARTPROC 1026 CFI_STARTPROC
1087 pushl $0 1027 pushl_cfi $0
1088 CFI_ADJUST_CFA_OFFSET 4
1089 SAVE_ALL 1028 SAVE_ALL
1090 TRACE_IRQS_OFF 1029 TRACE_IRQS_OFF
1091 1030
@@ -1121,23 +1060,20 @@ ENDPROC(xen_hypervisor_callback)
1121# We distinguish between categories by maintaining a status value in EAX. 1060# We distinguish between categories by maintaining a status value in EAX.
1122ENTRY(xen_failsafe_callback) 1061ENTRY(xen_failsafe_callback)
1123 CFI_STARTPROC 1062 CFI_STARTPROC
1124 pushl %eax 1063 pushl_cfi %eax
1125 CFI_ADJUST_CFA_OFFSET 4
1126 movl $1,%eax 1064 movl $1,%eax
11271: mov 4(%esp),%ds 10651: mov 4(%esp),%ds
11282: mov 8(%esp),%es 10662: mov 8(%esp),%es
11293: mov 12(%esp),%fs 10673: mov 12(%esp),%fs
11304: mov 16(%esp),%gs 10684: mov 16(%esp),%gs
1131 testl %eax,%eax 1069 testl %eax,%eax
1132 popl %eax 1070 popl_cfi %eax
1133 CFI_ADJUST_CFA_OFFSET -4
1134 lea 16(%esp),%esp 1071 lea 16(%esp),%esp
1135 CFI_ADJUST_CFA_OFFSET -16 1072 CFI_ADJUST_CFA_OFFSET -16
1136 jz 5f 1073 jz 5f
1137 addl $16,%esp 1074 addl $16,%esp
1138 jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) 1075 jmp iret_exc # EAX != 0 => Category 2 (Bad IRET)
11395: pushl $0 # EAX == 0 => Category 1 (Bad segment) 10765: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment)
1140 CFI_ADJUST_CFA_OFFSET 4
1141 SAVE_ALL 1077 SAVE_ALL
1142 jmp ret_from_exception 1078 jmp ret_from_exception
1143 CFI_ENDPROC 1079 CFI_ENDPROC
@@ -1287,40 +1223,29 @@ syscall_table_size=(.-sys_call_table)
1287 1223
1288ENTRY(page_fault) 1224ENTRY(page_fault)
1289 RING0_EC_FRAME 1225 RING0_EC_FRAME
1290 pushl $do_page_fault 1226 pushl_cfi $do_page_fault
1291 CFI_ADJUST_CFA_OFFSET 4
1292 ALIGN 1227 ALIGN
1293error_code: 1228error_code:
1294 /* the function address is in %gs's slot on the stack */ 1229 /* the function address is in %gs's slot on the stack */
1295 pushl %fs 1230 pushl_cfi %fs
1296 CFI_ADJUST_CFA_OFFSET 4
1297 /*CFI_REL_OFFSET fs, 0*/ 1231 /*CFI_REL_OFFSET fs, 0*/
1298 pushl %es 1232 pushl_cfi %es
1299 CFI_ADJUST_CFA_OFFSET 4
1300 /*CFI_REL_OFFSET es, 0*/ 1233 /*CFI_REL_OFFSET es, 0*/
1301 pushl %ds 1234 pushl_cfi %ds
1302 CFI_ADJUST_CFA_OFFSET 4
1303 /*CFI_REL_OFFSET ds, 0*/ 1235 /*CFI_REL_OFFSET ds, 0*/
1304 pushl %eax 1236 pushl_cfi %eax
1305 CFI_ADJUST_CFA_OFFSET 4
1306 CFI_REL_OFFSET eax, 0 1237 CFI_REL_OFFSET eax, 0
1307 pushl %ebp 1238 pushl_cfi %ebp
1308 CFI_ADJUST_CFA_OFFSET 4
1309 CFI_REL_OFFSET ebp, 0 1239 CFI_REL_OFFSET ebp, 0
1310 pushl %edi 1240 pushl_cfi %edi
1311 CFI_ADJUST_CFA_OFFSET 4
1312 CFI_REL_OFFSET edi, 0 1241 CFI_REL_OFFSET edi, 0
1313 pushl %esi 1242 pushl_cfi %esi
1314 CFI_ADJUST_CFA_OFFSET 4
1315 CFI_REL_OFFSET esi, 0 1243 CFI_REL_OFFSET esi, 0
1316 pushl %edx 1244 pushl_cfi %edx
1317 CFI_ADJUST_CFA_OFFSET 4
1318 CFI_REL_OFFSET edx, 0 1245 CFI_REL_OFFSET edx, 0
1319 pushl %ecx 1246 pushl_cfi %ecx
1320 CFI_ADJUST_CFA_OFFSET 4
1321 CFI_REL_OFFSET ecx, 0 1247 CFI_REL_OFFSET ecx, 0
1322 pushl %ebx 1248 pushl_cfi %ebx
1323 CFI_ADJUST_CFA_OFFSET 4
1324 CFI_REL_OFFSET ebx, 0 1249 CFI_REL_OFFSET ebx, 0
1325 cld 1250 cld
1326 movl $(__KERNEL_PERCPU), %ecx 1251 movl $(__KERNEL_PERCPU), %ecx
@@ -1362,12 +1287,9 @@ END(page_fault)
1362 movl TSS_sysenter_sp0 + \offset(%esp), %esp 1287 movl TSS_sysenter_sp0 + \offset(%esp), %esp
1363 CFI_DEF_CFA esp, 0 1288 CFI_DEF_CFA esp, 0
1364 CFI_UNDEFINED eip 1289 CFI_UNDEFINED eip
1365 pushfl 1290 pushfl_cfi
1366 CFI_ADJUST_CFA_OFFSET 4 1291 pushl_cfi $__KERNEL_CS
1367 pushl $__KERNEL_CS 1292 pushl_cfi $sysenter_past_esp
1368 CFI_ADJUST_CFA_OFFSET 4
1369 pushl $sysenter_past_esp
1370 CFI_ADJUST_CFA_OFFSET 4
1371 CFI_REL_OFFSET eip, 0 1293 CFI_REL_OFFSET eip, 0
1372.endm 1294.endm
1373 1295
@@ -1377,8 +1299,7 @@ ENTRY(debug)
1377 jne debug_stack_correct 1299 jne debug_stack_correct
1378 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn 1300 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1379debug_stack_correct: 1301debug_stack_correct:
1380 pushl $-1 # mark this as an int 1302 pushl_cfi $-1 # mark this as an int
1381 CFI_ADJUST_CFA_OFFSET 4
1382 SAVE_ALL 1303 SAVE_ALL
1383 TRACE_IRQS_OFF 1304 TRACE_IRQS_OFF
1384 xorl %edx,%edx # error code 0 1305 xorl %edx,%edx # error code 0
@@ -1398,32 +1319,27 @@ END(debug)
1398 */ 1319 */
1399ENTRY(nmi) 1320ENTRY(nmi)
1400 RING0_INT_FRAME 1321 RING0_INT_FRAME
1401 pushl %eax 1322 pushl_cfi %eax
1402 CFI_ADJUST_CFA_OFFSET 4
1403 movl %ss, %eax 1323 movl %ss, %eax
1404 cmpw $__ESPFIX_SS, %ax 1324 cmpw $__ESPFIX_SS, %ax
1405 popl %eax 1325 popl_cfi %eax
1406 CFI_ADJUST_CFA_OFFSET -4
1407 je nmi_espfix_stack 1326 je nmi_espfix_stack
1408 cmpl $ia32_sysenter_target,(%esp) 1327 cmpl $ia32_sysenter_target,(%esp)
1409 je nmi_stack_fixup 1328 je nmi_stack_fixup
1410 pushl %eax 1329 pushl_cfi %eax
1411 CFI_ADJUST_CFA_OFFSET 4
1412 movl %esp,%eax 1330 movl %esp,%eax
1413 /* Do not access memory above the end of our stack page, 1331 /* Do not access memory above the end of our stack page,
1414 * it might not exist. 1332 * it might not exist.
1415 */ 1333 */
1416 andl $(THREAD_SIZE-1),%eax 1334 andl $(THREAD_SIZE-1),%eax
1417 cmpl $(THREAD_SIZE-20),%eax 1335 cmpl $(THREAD_SIZE-20),%eax
1418 popl %eax 1336 popl_cfi %eax
1419 CFI_ADJUST_CFA_OFFSET -4
1420 jae nmi_stack_correct 1337 jae nmi_stack_correct
1421 cmpl $ia32_sysenter_target,12(%esp) 1338 cmpl $ia32_sysenter_target,12(%esp)
1422 je nmi_debug_stack_check 1339 je nmi_debug_stack_check
1423nmi_stack_correct: 1340nmi_stack_correct:
1424 /* We have a RING0_INT_FRAME here */ 1341 /* We have a RING0_INT_FRAME here */
1425 pushl %eax 1342 pushl_cfi %eax
1426 CFI_ADJUST_CFA_OFFSET 4
1427 SAVE_ALL 1343 SAVE_ALL
1428 xorl %edx,%edx # zero error code 1344 xorl %edx,%edx # zero error code
1429 movl %esp,%eax # pt_regs pointer 1345 movl %esp,%eax # pt_regs pointer
@@ -1452,18 +1368,14 @@ nmi_espfix_stack:
1452 * 1368 *
1453 * create the pointer to lss back 1369 * create the pointer to lss back
1454 */ 1370 */
1455 pushl %ss 1371 pushl_cfi %ss
1456 CFI_ADJUST_CFA_OFFSET 4 1372 pushl_cfi %esp
1457 pushl %esp
1458 CFI_ADJUST_CFA_OFFSET 4
1459 addl $4, (%esp) 1373 addl $4, (%esp)
1460 /* copy the iret frame of 12 bytes */ 1374 /* copy the iret frame of 12 bytes */
1461 .rept 3 1375 .rept 3
1462 pushl 16(%esp) 1376 pushl_cfi 16(%esp)
1463 CFI_ADJUST_CFA_OFFSET 4
1464 .endr 1377 .endr
1465 pushl %eax 1378 pushl_cfi %eax
1466 CFI_ADJUST_CFA_OFFSET 4
1467 SAVE_ALL 1379 SAVE_ALL
1468 FIXUP_ESPFIX_STACK # %eax == %esp 1380 FIXUP_ESPFIX_STACK # %eax == %esp
1469 xorl %edx,%edx # zero error code 1381 xorl %edx,%edx # zero error code
@@ -1477,8 +1389,7 @@ END(nmi)
1477 1389
1478ENTRY(int3) 1390ENTRY(int3)
1479 RING0_INT_FRAME 1391 RING0_INT_FRAME
1480 pushl $-1 # mark this as an int 1392 pushl_cfi $-1 # mark this as an int
1481 CFI_ADJUST_CFA_OFFSET 4
1482 SAVE_ALL 1393 SAVE_ALL
1483 TRACE_IRQS_OFF 1394 TRACE_IRQS_OFF
1484 xorl %edx,%edx # zero error code 1395 xorl %edx,%edx # zero error code
@@ -1490,8 +1401,7 @@ END(int3)
1490 1401
1491ENTRY(general_protection) 1402ENTRY(general_protection)
1492 RING0_EC_FRAME 1403 RING0_EC_FRAME
1493 pushl $do_general_protection 1404 pushl_cfi $do_general_protection
1494 CFI_ADJUST_CFA_OFFSET 4
1495 jmp error_code 1405 jmp error_code
1496 CFI_ENDPROC 1406 CFI_ENDPROC
1497END(general_protection) 1407END(general_protection)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 17be5ec7cbba..a7ae7fd1010f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -213,23 +213,17 @@ ENDPROC(native_usergs_sysret64)
213 .macro FAKE_STACK_FRAME child_rip 213 .macro FAKE_STACK_FRAME child_rip
214 /* push in order ss, rsp, eflags, cs, rip */ 214 /* push in order ss, rsp, eflags, cs, rip */
215 xorl %eax, %eax 215 xorl %eax, %eax
216 pushq $__KERNEL_DS /* ss */ 216 pushq_cfi $__KERNEL_DS /* ss */
217 CFI_ADJUST_CFA_OFFSET 8
218 /*CFI_REL_OFFSET ss,0*/ 217 /*CFI_REL_OFFSET ss,0*/
219 pushq %rax /* rsp */ 218 pushq_cfi %rax /* rsp */
220 CFI_ADJUST_CFA_OFFSET 8
221 CFI_REL_OFFSET rsp,0 219 CFI_REL_OFFSET rsp,0
222 pushq $X86_EFLAGS_IF /* eflags - interrupts on */ 220 pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
223 CFI_ADJUST_CFA_OFFSET 8
224 /*CFI_REL_OFFSET rflags,0*/ 221 /*CFI_REL_OFFSET rflags,0*/
225 pushq $__KERNEL_CS /* cs */ 222 pushq_cfi $__KERNEL_CS /* cs */
226 CFI_ADJUST_CFA_OFFSET 8
227 /*CFI_REL_OFFSET cs,0*/ 223 /*CFI_REL_OFFSET cs,0*/
228 pushq \child_rip /* rip */ 224 pushq_cfi \child_rip /* rip */
229 CFI_ADJUST_CFA_OFFSET 8
230 CFI_REL_OFFSET rip,0 225 CFI_REL_OFFSET rip,0
231 pushq %rax /* orig rax */ 226 pushq_cfi %rax /* orig rax */
232 CFI_ADJUST_CFA_OFFSET 8
233 .endm 227 .endm
234 228
235 .macro UNFAKE_STACK_FRAME 229 .macro UNFAKE_STACK_FRAME
@@ -398,10 +392,8 @@ ENTRY(ret_from_fork)
398 392
399 LOCK ; btr $TIF_FORK,TI_flags(%r8) 393 LOCK ; btr $TIF_FORK,TI_flags(%r8)
400 394
401 push kernel_eflags(%rip) 395 pushq_cfi kernel_eflags(%rip)
402 CFI_ADJUST_CFA_OFFSET 8 396 popfq_cfi # reset kernel eflags
403 popf # reset kernel eflags
404 CFI_ADJUST_CFA_OFFSET -8
405 397
406 call schedule_tail # rdi: 'prev' task parameter 398 call schedule_tail # rdi: 'prev' task parameter
407 399
@@ -521,11 +513,9 @@ sysret_careful:
521 jnc sysret_signal 513 jnc sysret_signal
522 TRACE_IRQS_ON 514 TRACE_IRQS_ON
523 ENABLE_INTERRUPTS(CLBR_NONE) 515 ENABLE_INTERRUPTS(CLBR_NONE)
524 pushq %rdi 516 pushq_cfi %rdi
525 CFI_ADJUST_CFA_OFFSET 8
526 call schedule 517 call schedule
527 popq %rdi 518 popq_cfi %rdi
528 CFI_ADJUST_CFA_OFFSET -8
529 jmp sysret_check 519 jmp sysret_check
530 520
531 /* Handle a signal */ 521 /* Handle a signal */
@@ -634,11 +624,9 @@ int_careful:
634 jnc int_very_careful 624 jnc int_very_careful
635 TRACE_IRQS_ON 625 TRACE_IRQS_ON
636 ENABLE_INTERRUPTS(CLBR_NONE) 626 ENABLE_INTERRUPTS(CLBR_NONE)
637 pushq %rdi 627 pushq_cfi %rdi
638 CFI_ADJUST_CFA_OFFSET 8
639 call schedule 628 call schedule
640 popq %rdi 629 popq_cfi %rdi
641 CFI_ADJUST_CFA_OFFSET -8
642 DISABLE_INTERRUPTS(CLBR_NONE) 630 DISABLE_INTERRUPTS(CLBR_NONE)
643 TRACE_IRQS_OFF 631 TRACE_IRQS_OFF
644 jmp int_with_check 632 jmp int_with_check
@@ -652,12 +640,10 @@ int_check_syscall_exit_work:
652 /* Check for syscall exit trace */ 640 /* Check for syscall exit trace */
653 testl $_TIF_WORK_SYSCALL_EXIT,%edx 641 testl $_TIF_WORK_SYSCALL_EXIT,%edx
654 jz int_signal 642 jz int_signal
655 pushq %rdi 643 pushq_cfi %rdi
656 CFI_ADJUST_CFA_OFFSET 8
657 leaq 8(%rsp),%rdi # &ptregs -> arg1 644 leaq 8(%rsp),%rdi # &ptregs -> arg1
658 call syscall_trace_leave 645 call syscall_trace_leave
659 popq %rdi 646 popq_cfi %rdi
660 CFI_ADJUST_CFA_OFFSET -8
661 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi 647 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
662 jmp int_restore_rest 648 jmp int_restore_rest
663 649
@@ -714,9 +700,8 @@ END(ptregscall_common)
714 700
715ENTRY(stub_execve) 701ENTRY(stub_execve)
716 CFI_STARTPROC 702 CFI_STARTPROC
717 popq %r11 703 addq $8, %rsp
718 CFI_ADJUST_CFA_OFFSET -8 704 PARTIAL_FRAME 0
719 CFI_REGISTER rip, r11
720 SAVE_REST 705 SAVE_REST
721 FIXUP_TOP_OF_STACK %r11 706 FIXUP_TOP_OF_STACK %r11
722 movq %rsp, %rcx 707 movq %rsp, %rcx
@@ -735,7 +720,7 @@ END(stub_execve)
735ENTRY(stub_rt_sigreturn) 720ENTRY(stub_rt_sigreturn)
736 CFI_STARTPROC 721 CFI_STARTPROC
737 addq $8, %rsp 722 addq $8, %rsp
738 CFI_ADJUST_CFA_OFFSET -8 723 PARTIAL_FRAME 0
739 SAVE_REST 724 SAVE_REST
740 movq %rsp,%rdi 725 movq %rsp,%rdi
741 FIXUP_TOP_OF_STACK %r11 726 FIXUP_TOP_OF_STACK %r11
@@ -766,8 +751,7 @@ vector=FIRST_EXTERNAL_VECTOR
766 .if vector <> FIRST_EXTERNAL_VECTOR 751 .if vector <> FIRST_EXTERNAL_VECTOR
767 CFI_ADJUST_CFA_OFFSET -8 752 CFI_ADJUST_CFA_OFFSET -8
768 .endif 753 .endif
7691: pushq $(~vector+0x80) /* Note: always in signed byte range */ 7541: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */
770 CFI_ADJUST_CFA_OFFSET 8
771 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 755 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
772 jmp 2f 756 jmp 2f
773 .endif 757 .endif
@@ -796,8 +780,8 @@ END(interrupt)
796 780
797/* 0(%rsp): ~(interrupt number) */ 781/* 0(%rsp): ~(interrupt number) */
798 .macro interrupt func 782 .macro interrupt func
799 subq $10*8, %rsp 783 subq $ORIG_RAX-ARGOFFSET+8, %rsp
800 CFI_ADJUST_CFA_OFFSET 10*8 784 CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8
801 call save_args 785 call save_args
802 PARTIAL_FRAME 0 786 PARTIAL_FRAME 0
803 call \func 787 call \func
@@ -822,6 +806,7 @@ ret_from_intr:
822 TRACE_IRQS_OFF 806 TRACE_IRQS_OFF
823 decl PER_CPU_VAR(irq_count) 807 decl PER_CPU_VAR(irq_count)
824 leaveq 808 leaveq
809 CFI_RESTORE rbp
825 CFI_DEF_CFA_REGISTER rsp 810 CFI_DEF_CFA_REGISTER rsp
826 CFI_ADJUST_CFA_OFFSET -8 811 CFI_ADJUST_CFA_OFFSET -8
827exit_intr: 812exit_intr:
@@ -903,11 +888,9 @@ retint_careful:
903 jnc retint_signal 888 jnc retint_signal
904 TRACE_IRQS_ON 889 TRACE_IRQS_ON
905 ENABLE_INTERRUPTS(CLBR_NONE) 890 ENABLE_INTERRUPTS(CLBR_NONE)
906 pushq %rdi 891 pushq_cfi %rdi
907 CFI_ADJUST_CFA_OFFSET 8
908 call schedule 892 call schedule
909 popq %rdi 893 popq_cfi %rdi
910 CFI_ADJUST_CFA_OFFSET -8
911 GET_THREAD_INFO(%rcx) 894 GET_THREAD_INFO(%rcx)
912 DISABLE_INTERRUPTS(CLBR_NONE) 895 DISABLE_INTERRUPTS(CLBR_NONE)
913 TRACE_IRQS_OFF 896 TRACE_IRQS_OFF
@@ -956,8 +939,7 @@ END(common_interrupt)
956.macro apicinterrupt num sym do_sym 939.macro apicinterrupt num sym do_sym
957ENTRY(\sym) 940ENTRY(\sym)
958 INTR_FRAME 941 INTR_FRAME
959 pushq $~(\num) 942 pushq_cfi $~(\num)
960 CFI_ADJUST_CFA_OFFSET 8
961 interrupt \do_sym 943 interrupt \do_sym
962 jmp ret_from_intr 944 jmp ret_from_intr
963 CFI_ENDPROC 945 CFI_ENDPROC
@@ -1023,9 +1005,9 @@ apicinterrupt ERROR_APIC_VECTOR \
1023apicinterrupt SPURIOUS_APIC_VECTOR \ 1005apicinterrupt SPURIOUS_APIC_VECTOR \
1024 spurious_interrupt smp_spurious_interrupt 1006 spurious_interrupt smp_spurious_interrupt
1025 1007
1026#ifdef CONFIG_PERF_EVENTS 1008#ifdef CONFIG_IRQ_WORK
1027apicinterrupt LOCAL_PENDING_VECTOR \ 1009apicinterrupt IRQ_WORK_VECTOR \
1028 perf_pending_interrupt smp_perf_pending_interrupt 1010 irq_work_interrupt smp_irq_work_interrupt
1029#endif 1011#endif
1030 1012
1031/* 1013/*
@@ -1036,8 +1018,8 @@ ENTRY(\sym)
1036 INTR_FRAME 1018 INTR_FRAME
1037 PARAVIRT_ADJUST_EXCEPTION_FRAME 1019 PARAVIRT_ADJUST_EXCEPTION_FRAME
1038 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1020 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1039 subq $15*8,%rsp 1021 subq $ORIG_RAX-R15, %rsp
1040 CFI_ADJUST_CFA_OFFSET 15*8 1022 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1041 call error_entry 1023 call error_entry
1042 DEFAULT_FRAME 0 1024 DEFAULT_FRAME 0
1043 movq %rsp,%rdi /* pt_regs pointer */ 1025 movq %rsp,%rdi /* pt_regs pointer */
@@ -1052,9 +1034,9 @@ END(\sym)
1052ENTRY(\sym) 1034ENTRY(\sym)
1053 INTR_FRAME 1035 INTR_FRAME
1054 PARAVIRT_ADJUST_EXCEPTION_FRAME 1036 PARAVIRT_ADJUST_EXCEPTION_FRAME
1055 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1037 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1056 CFI_ADJUST_CFA_OFFSET 8 1038 subq $ORIG_RAX-R15, %rsp
1057 subq $15*8, %rsp 1039 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1058 call save_paranoid 1040 call save_paranoid
1059 TRACE_IRQS_OFF 1041 TRACE_IRQS_OFF
1060 movq %rsp,%rdi /* pt_regs pointer */ 1042 movq %rsp,%rdi /* pt_regs pointer */
@@ -1070,9 +1052,9 @@ END(\sym)
1070ENTRY(\sym) 1052ENTRY(\sym)
1071 INTR_FRAME 1053 INTR_FRAME
1072 PARAVIRT_ADJUST_EXCEPTION_FRAME 1054 PARAVIRT_ADJUST_EXCEPTION_FRAME
1073 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1055 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1074 CFI_ADJUST_CFA_OFFSET 8 1056 subq $ORIG_RAX-R15, %rsp
1075 subq $15*8, %rsp 1057 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1076 call save_paranoid 1058 call save_paranoid
1077 TRACE_IRQS_OFF 1059 TRACE_IRQS_OFF
1078 movq %rsp,%rdi /* pt_regs pointer */ 1060 movq %rsp,%rdi /* pt_regs pointer */
@@ -1089,8 +1071,8 @@ END(\sym)
1089ENTRY(\sym) 1071ENTRY(\sym)
1090 XCPT_FRAME 1072 XCPT_FRAME
1091 PARAVIRT_ADJUST_EXCEPTION_FRAME 1073 PARAVIRT_ADJUST_EXCEPTION_FRAME
1092 subq $15*8,%rsp 1074 subq $ORIG_RAX-R15, %rsp
1093 CFI_ADJUST_CFA_OFFSET 15*8 1075 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1094 call error_entry 1076 call error_entry
1095 DEFAULT_FRAME 0 1077 DEFAULT_FRAME 0
1096 movq %rsp,%rdi /* pt_regs pointer */ 1078 movq %rsp,%rdi /* pt_regs pointer */
@@ -1107,8 +1089,8 @@ END(\sym)
1107ENTRY(\sym) 1089ENTRY(\sym)
1108 XCPT_FRAME 1090 XCPT_FRAME
1109 PARAVIRT_ADJUST_EXCEPTION_FRAME 1091 PARAVIRT_ADJUST_EXCEPTION_FRAME
1110 subq $15*8,%rsp 1092 subq $ORIG_RAX-R15, %rsp
1111 CFI_ADJUST_CFA_OFFSET 15*8 1093 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1112 call save_paranoid 1094 call save_paranoid
1113 DEFAULT_FRAME 0 1095 DEFAULT_FRAME 0
1114 TRACE_IRQS_OFF 1096 TRACE_IRQS_OFF
@@ -1139,16 +1121,14 @@ zeroentry simd_coprocessor_error do_simd_coprocessor_error
1139 /* edi: new selector */ 1121 /* edi: new selector */
1140ENTRY(native_load_gs_index) 1122ENTRY(native_load_gs_index)
1141 CFI_STARTPROC 1123 CFI_STARTPROC
1142 pushf 1124 pushfq_cfi
1143 CFI_ADJUST_CFA_OFFSET 8
1144 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) 1125 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
1145 SWAPGS 1126 SWAPGS
1146gs_change: 1127gs_change:
1147 movl %edi,%gs 1128 movl %edi,%gs
11482: mfence /* workaround */ 11292: mfence /* workaround */
1149 SWAPGS 1130 SWAPGS
1150 popf 1131 popfq_cfi
1151 CFI_ADJUST_CFA_OFFSET -8
1152 ret 1132 ret
1153 CFI_ENDPROC 1133 CFI_ENDPROC
1154END(native_load_gs_index) 1134END(native_load_gs_index)
@@ -1215,8 +1195,7 @@ END(kernel_execve)
1215/* Call softirq on interrupt stack. Interrupts are off. */ 1195/* Call softirq on interrupt stack. Interrupts are off. */
1216ENTRY(call_softirq) 1196ENTRY(call_softirq)
1217 CFI_STARTPROC 1197 CFI_STARTPROC
1218 push %rbp 1198 pushq_cfi %rbp
1219 CFI_ADJUST_CFA_OFFSET 8
1220 CFI_REL_OFFSET rbp,0 1199 CFI_REL_OFFSET rbp,0
1221 mov %rsp,%rbp 1200 mov %rsp,%rbp
1222 CFI_DEF_CFA_REGISTER rbp 1201 CFI_DEF_CFA_REGISTER rbp
@@ -1225,6 +1204,7 @@ ENTRY(call_softirq)
1225 push %rbp # backlink for old unwinder 1204 push %rbp # backlink for old unwinder
1226 call __do_softirq 1205 call __do_softirq
1227 leaveq 1206 leaveq
1207 CFI_RESTORE rbp
1228 CFI_DEF_CFA_REGISTER rsp 1208 CFI_DEF_CFA_REGISTER rsp
1229 CFI_ADJUST_CFA_OFFSET -8 1209 CFI_ADJUST_CFA_OFFSET -8
1230 decl PER_CPU_VAR(irq_count) 1210 decl PER_CPU_VAR(irq_count)
@@ -1368,7 +1348,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip)
1368 1348
1369 /* ebx: no swapgs flag */ 1349 /* ebx: no swapgs flag */
1370ENTRY(paranoid_exit) 1350ENTRY(paranoid_exit)
1371 INTR_FRAME 1351 DEFAULT_FRAME
1372 DISABLE_INTERRUPTS(CLBR_NONE) 1352 DISABLE_INTERRUPTS(CLBR_NONE)
1373 TRACE_IRQS_OFF 1353 TRACE_IRQS_OFF
1374 testl %ebx,%ebx /* swapgs needed? */ 1354 testl %ebx,%ebx /* swapgs needed? */
@@ -1445,7 +1425,6 @@ error_swapgs:
1445error_sti: 1425error_sti:
1446 TRACE_IRQS_OFF 1426 TRACE_IRQS_OFF
1447 ret 1427 ret
1448 CFI_ENDPROC
1449 1428
1450/* 1429/*
1451 * There are two places in the kernel that can potentially fault with 1430 * There are two places in the kernel that can potentially fault with
@@ -1470,6 +1449,7 @@ bstep_iret:
1470 /* Fix truncated RIP */ 1449 /* Fix truncated RIP */
1471 movq %rcx,RIP+8(%rsp) 1450 movq %rcx,RIP+8(%rsp)
1472 jmp error_swapgs 1451 jmp error_swapgs
1452 CFI_ENDPROC
1473END(error_entry) 1453END(error_entry)
1474 1454
1475 1455
@@ -1498,8 +1478,8 @@ ENTRY(nmi)
1498 INTR_FRAME 1478 INTR_FRAME
1499 PARAVIRT_ADJUST_EXCEPTION_FRAME 1479 PARAVIRT_ADJUST_EXCEPTION_FRAME
1500 pushq_cfi $-1 1480 pushq_cfi $-1
1501 subq $15*8, %rsp 1481 subq $ORIG_RAX-R15, %rsp
1502 CFI_ADJUST_CFA_OFFSET 15*8 1482 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1503 call save_paranoid 1483 call save_paranoid
1504 DEFAULT_FRAME 0 1484 DEFAULT_FRAME 0
1505 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ 1485 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index cd37469b54ee..3afb33f14d2d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
257 return mod_code_status; 257 return mod_code_status;
258} 258}
259 259
260
261
262
263static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
264
265static unsigned char *ftrace_nop_replace(void) 260static unsigned char *ftrace_nop_replace(void)
266{ 261{
267 return ftrace_nop; 262 return ideal_nop5;
268} 263}
269 264
270static int 265static int
@@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
338 333
339int __init ftrace_dyn_arch_init(void *data) 334int __init ftrace_dyn_arch_init(void *data)
340{ 335{
341 extern const unsigned char ftrace_test_p6nop[];
342 extern const unsigned char ftrace_test_nop5[];
343 extern const unsigned char ftrace_test_jmp[];
344 int faulted = 0;
345
346 /*
347 * There is no good nop for all x86 archs.
348 * We will default to using the P6_NOP5, but first we
349 * will test to make sure that the nop will actually
350 * work on this CPU. If it faults, we will then
351 * go to a lesser efficient 5 byte nop. If that fails
352 * we then just use a jmp as our nop. This isn't the most
353 * efficient nop, but we can not use a multi part nop
354 * since we would then risk being preempted in the middle
355 * of that nop, and if we enabled tracing then, it might
356 * cause a system crash.
357 *
358 * TODO: check the cpuid to determine the best nop.
359 */
360 asm volatile (
361 "ftrace_test_jmp:"
362 "jmp ftrace_test_p6nop\n"
363 "nop\n"
364 "nop\n"
365 "nop\n" /* 2 byte jmp + 3 bytes */
366 "ftrace_test_p6nop:"
367 P6_NOP5
368 "jmp 1f\n"
369 "ftrace_test_nop5:"
370 ".byte 0x66,0x66,0x66,0x66,0x90\n"
371 "1:"
372 ".section .fixup, \"ax\"\n"
373 "2: movl $1, %0\n"
374 " jmp ftrace_test_nop5\n"
375 "3: movl $2, %0\n"
376 " jmp 1b\n"
377 ".previous\n"
378 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
379 _ASM_EXTABLE(ftrace_test_nop5, 3b)
380 : "=r"(faulted) : "0" (faulted));
381
382 switch (faulted) {
383 case 0:
384 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
385 memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
386 break;
387 case 1:
388 pr_info("converting mcount calls to 66 66 66 66 90\n");
389 memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
390 break;
391 case 2:
392 pr_info("converting mcount calls to jmp . + 5\n");
393 memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
394 break;
395 }
396
397 /* The return code is retured via data */ 336 /* The return code is retured via data */
398 *(unsigned long *)data = 0; 337 *(unsigned long *)data = 0;
399 338
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 7494999141b3..efaf906daf93 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -440,9 +440,9 @@ static int hpet_legacy_next_event(unsigned long delta,
440static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); 440static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
441static struct hpet_dev *hpet_devs; 441static struct hpet_dev *hpet_devs;
442 442
443void hpet_msi_unmask(unsigned int irq) 443void hpet_msi_unmask(struct irq_data *data)
444{ 444{
445 struct hpet_dev *hdev = get_irq_data(irq); 445 struct hpet_dev *hdev = data->handler_data;
446 unsigned int cfg; 446 unsigned int cfg;
447 447
448 /* unmask it */ 448 /* unmask it */
@@ -451,10 +451,10 @@ void hpet_msi_unmask(unsigned int irq)
451 hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); 451 hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
452} 452}
453 453
454void hpet_msi_mask(unsigned int irq) 454void hpet_msi_mask(struct irq_data *data)
455{ 455{
456 struct hpet_dev *hdev = data->handler_data;
456 unsigned int cfg; 457 unsigned int cfg;
457 struct hpet_dev *hdev = get_irq_data(irq);
458 458
459 /* mask it */ 459 /* mask it */
460 cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); 460 cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
@@ -462,18 +462,14 @@ void hpet_msi_mask(unsigned int irq)
462 hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); 462 hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
463} 463}
464 464
465void hpet_msi_write(unsigned int irq, struct msi_msg *msg) 465void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg)
466{ 466{
467 struct hpet_dev *hdev = get_irq_data(irq);
468
469 hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num)); 467 hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
470 hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4); 468 hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
471} 469}
472 470
473void hpet_msi_read(unsigned int irq, struct msi_msg *msg) 471void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg)
474{ 472{
475 struct hpet_dev *hdev = get_irq_data(irq);
476
477 msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); 473 msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
478 msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); 474 msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
479 msg->address_hi = 0; 475 msg->address_hi = 0;
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index a46cb3522c0c..58bb239a2fd7 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -68,19 +68,22 @@ static void __cpuinit init_thread_xstate(void)
68 */ 68 */
69 69
70 if (!HAVE_HWFP) { 70 if (!HAVE_HWFP) {
71 /*
72 * Disable xsave as we do not support it if i387
73 * emulation is enabled.
74 */
75 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
76 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
71 xstate_size = sizeof(struct i387_soft_struct); 77 xstate_size = sizeof(struct i387_soft_struct);
72 return; 78 return;
73 } 79 }
74 80
75 if (cpu_has_fxsr) 81 if (cpu_has_fxsr)
76 xstate_size = sizeof(struct i387_fxsave_struct); 82 xstate_size = sizeof(struct i387_fxsave_struct);
77#ifdef CONFIG_X86_32
78 else 83 else
79 xstate_size = sizeof(struct i387_fsave_struct); 84 xstate_size = sizeof(struct i387_fsave_struct);
80#endif
81} 85}
82 86
83#ifdef CONFIG_X86_64
84/* 87/*
85 * Called at bootup to set up the initial FPU state that is later cloned 88 * Called at bootup to set up the initial FPU state that is later cloned
86 * into all processes. 89 * into all processes.
@@ -88,12 +91,21 @@ static void __cpuinit init_thread_xstate(void)
88 91
89void __cpuinit fpu_init(void) 92void __cpuinit fpu_init(void)
90{ 93{
91 unsigned long oldcr0 = read_cr0(); 94 unsigned long cr0;
92 95 unsigned long cr4_mask = 0;
93 set_in_cr4(X86_CR4_OSFXSR);
94 set_in_cr4(X86_CR4_OSXMMEXCPT);
95 96
96 write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ 97 if (cpu_has_fxsr)
98 cr4_mask |= X86_CR4_OSFXSR;
99 if (cpu_has_xmm)
100 cr4_mask |= X86_CR4_OSXMMEXCPT;
101 if (cr4_mask)
102 set_in_cr4(cr4_mask);
103
104 cr0 = read_cr0();
105 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
106 if (!HAVE_HWFP)
107 cr0 |= X86_CR0_EM;
108 write_cr0(cr0);
97 109
98 if (!smp_processor_id()) 110 if (!smp_processor_id())
99 init_thread_xstate(); 111 init_thread_xstate();
@@ -104,24 +116,12 @@ void __cpuinit fpu_init(void)
104 clear_used_math(); 116 clear_used_math();
105} 117}
106 118
107#else /* CONFIG_X86_64 */
108
109void __cpuinit fpu_init(void)
110{
111 if (!smp_processor_id())
112 init_thread_xstate();
113}
114
115#endif /* CONFIG_X86_32 */
116
117void fpu_finit(struct fpu *fpu) 119void fpu_finit(struct fpu *fpu)
118{ 120{
119#ifdef CONFIG_X86_32
120 if (!HAVE_HWFP) { 121 if (!HAVE_HWFP) {
121 finit_soft_fpu(&fpu->state->soft); 122 finit_soft_fpu(&fpu->state->soft);
122 return; 123 return;
123 } 124 }
124#endif
125 125
126 if (cpu_has_fxsr) { 126 if (cpu_has_fxsr) {
127 struct i387_fxsave_struct *fx = &fpu->state->fxsave; 127 struct i387_fxsave_struct *fx = &fpu->state->fxsave;
@@ -386,19 +386,17 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
386#ifdef CONFIG_X86_64 386#ifdef CONFIG_X86_64
387 env->fip = fxsave->rip; 387 env->fip = fxsave->rip;
388 env->foo = fxsave->rdp; 388 env->foo = fxsave->rdp;
389 /*
390 * should be actually ds/cs at fpu exception time, but
391 * that information is not available in 64bit mode.
392 */
393 env->fcs = task_pt_regs(tsk)->cs;
389 if (tsk == current) { 394 if (tsk == current) {
390 /* 395 savesegment(ds, env->fos);
391 * should be actually ds/cs at fpu exception time, but
392 * that information is not available in 64bit mode.
393 */
394 asm("mov %%ds, %[fos]" : [fos] "=r" (env->fos));
395 asm("mov %%cs, %[fcs]" : [fcs] "=r" (env->fcs));
396 } else { 396 } else {
397 struct pt_regs *regs = task_pt_regs(tsk); 397 env->fos = tsk->thread.ds;
398
399 env->fos = 0xffff0000 | tsk->thread.ds;
400 env->fcs = regs->cs;
401 } 398 }
399 env->fos |= 0xffff0000;
402#else 400#else
403 env->fip = fxsave->fip; 401 env->fip = fxsave->fip;
404 env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); 402 env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index cafa7c80ac95..20757cb2efa3 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -29,24 +29,10 @@
29 * plus some generic x86 specific things if generic specifics makes 29 * plus some generic x86 specific things if generic specifics makes
30 * any sense at all. 30 * any sense at all.
31 */ 31 */
32static void init_8259A(int auto_eoi);
32 33
33static int i8259A_auto_eoi; 34static int i8259A_auto_eoi;
34DEFINE_RAW_SPINLOCK(i8259A_lock); 35DEFINE_RAW_SPINLOCK(i8259A_lock);
35static void mask_and_ack_8259A(unsigned int);
36static void mask_8259A(void);
37static void unmask_8259A(void);
38static void disable_8259A_irq(unsigned int irq);
39static void enable_8259A_irq(unsigned int irq);
40static void init_8259A(int auto_eoi);
41static int i8259A_irq_pending(unsigned int irq);
42
43struct irq_chip i8259A_chip = {
44 .name = "XT-PIC",
45 .mask = disable_8259A_irq,
46 .disable = disable_8259A_irq,
47 .unmask = enable_8259A_irq,
48 .mask_ack = mask_and_ack_8259A,
49};
50 36
51/* 37/*
52 * 8259A PIC functions to handle ISA devices: 38 * 8259A PIC functions to handle ISA devices:
@@ -68,7 +54,7 @@ unsigned int cached_irq_mask = 0xffff;
68 */ 54 */
69unsigned long io_apic_irqs; 55unsigned long io_apic_irqs;
70 56
71static void disable_8259A_irq(unsigned int irq) 57static void mask_8259A_irq(unsigned int irq)
72{ 58{
73 unsigned int mask = 1 << irq; 59 unsigned int mask = 1 << irq;
74 unsigned long flags; 60 unsigned long flags;
@@ -82,7 +68,12 @@ static void disable_8259A_irq(unsigned int irq)
82 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 68 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
83} 69}
84 70
85static void enable_8259A_irq(unsigned int irq) 71static void disable_8259A_irq(struct irq_data *data)
72{
73 mask_8259A_irq(data->irq);
74}
75
76static void unmask_8259A_irq(unsigned int irq)
86{ 77{
87 unsigned int mask = ~(1 << irq); 78 unsigned int mask = ~(1 << irq);
88 unsigned long flags; 79 unsigned long flags;
@@ -96,6 +87,11 @@ static void enable_8259A_irq(unsigned int irq)
96 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 87 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
97} 88}
98 89
90static void enable_8259A_irq(struct irq_data *data)
91{
92 unmask_8259A_irq(data->irq);
93}
94
99static int i8259A_irq_pending(unsigned int irq) 95static int i8259A_irq_pending(unsigned int irq)
100{ 96{
101 unsigned int mask = 1<<irq; 97 unsigned int mask = 1<<irq;
@@ -117,7 +113,7 @@ static void make_8259A_irq(unsigned int irq)
117 disable_irq_nosync(irq); 113 disable_irq_nosync(irq);
118 io_apic_irqs &= ~(1<<irq); 114 io_apic_irqs &= ~(1<<irq);
119 set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, 115 set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
120 "XT"); 116 i8259A_chip.name);
121 enable_irq(irq); 117 enable_irq(irq);
122} 118}
123 119
@@ -150,8 +146,9 @@ static inline int i8259A_irq_real(unsigned int irq)
150 * first, _then_ send the EOI, and the order of EOI 146 * first, _then_ send the EOI, and the order of EOI
151 * to the two 8259s is important! 147 * to the two 8259s is important!
152 */ 148 */
153static void mask_and_ack_8259A(unsigned int irq) 149static void mask_and_ack_8259A(struct irq_data *data)
154{ 150{
151 unsigned int irq = data->irq;
155 unsigned int irqmask = 1 << irq; 152 unsigned int irqmask = 1 << irq;
156 unsigned long flags; 153 unsigned long flags;
157 154
@@ -223,6 +220,14 @@ spurious_8259A_irq:
223 } 220 }
224} 221}
225 222
223struct irq_chip i8259A_chip = {
224 .name = "XT-PIC",
225 .irq_mask = disable_8259A_irq,
226 .irq_disable = disable_8259A_irq,
227 .irq_unmask = enable_8259A_irq,
228 .irq_mask_ack = mask_and_ack_8259A,
229};
230
226static char irq_trigger[2]; 231static char irq_trigger[2];
227/** 232/**
228 * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ 233 * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
@@ -342,9 +347,9 @@ static void init_8259A(int auto_eoi)
342 * In AEOI mode we just have to mask the interrupt 347 * In AEOI mode we just have to mask the interrupt
343 * when acking. 348 * when acking.
344 */ 349 */
345 i8259A_chip.mask_ack = disable_8259A_irq; 350 i8259A_chip.irq_mask_ack = disable_8259A_irq;
346 else 351 else
347 i8259A_chip.mask_ack = mask_and_ack_8259A; 352 i8259A_chip.irq_mask_ack = mask_and_ack_8259A;
348 353
349 udelay(100); /* wait for 8259A to initialize */ 354 udelay(100); /* wait for 8259A to initialize */
350 355
@@ -363,14 +368,6 @@ static void init_8259A(int auto_eoi)
363static void legacy_pic_noop(void) { }; 368static void legacy_pic_noop(void) { };
364static void legacy_pic_uint_noop(unsigned int unused) { }; 369static void legacy_pic_uint_noop(unsigned int unused) { };
365static void legacy_pic_int_noop(int unused) { }; 370static void legacy_pic_int_noop(int unused) { };
366
367static struct irq_chip dummy_pic_chip = {
368 .name = "dummy pic",
369 .mask = legacy_pic_uint_noop,
370 .unmask = legacy_pic_uint_noop,
371 .disable = legacy_pic_uint_noop,
372 .mask_ack = legacy_pic_uint_noop,
373};
374static int legacy_pic_irq_pending_noop(unsigned int irq) 371static int legacy_pic_irq_pending_noop(unsigned int irq)
375{ 372{
376 return 0; 373 return 0;
@@ -378,7 +375,9 @@ static int legacy_pic_irq_pending_noop(unsigned int irq)
378 375
379struct legacy_pic null_legacy_pic = { 376struct legacy_pic null_legacy_pic = {
380 .nr_legacy_irqs = 0, 377 .nr_legacy_irqs = 0,
381 .chip = &dummy_pic_chip, 378 .chip = &dummy_irq_chip,
379 .mask = legacy_pic_uint_noop,
380 .unmask = legacy_pic_uint_noop,
382 .mask_all = legacy_pic_noop, 381 .mask_all = legacy_pic_noop,
383 .restore_mask = legacy_pic_noop, 382 .restore_mask = legacy_pic_noop,
384 .init = legacy_pic_int_noop, 383 .init = legacy_pic_int_noop,
@@ -389,7 +388,9 @@ struct legacy_pic null_legacy_pic = {
389struct legacy_pic default_legacy_pic = { 388struct legacy_pic default_legacy_pic = {
390 .nr_legacy_irqs = NR_IRQS_LEGACY, 389 .nr_legacy_irqs = NR_IRQS_LEGACY,
391 .chip = &i8259A_chip, 390 .chip = &i8259A_chip,
392 .mask_all = mask_8259A, 391 .mask = mask_8259A_irq,
392 .unmask = unmask_8259A_irq,
393 .mask_all = mask_8259A,
393 .restore_mask = unmask_8259A, 394 .restore_mask = unmask_8259A,
394 .init = init_8259A, 395 .init = init_8259A,
395 .irq_pending = i8259A_irq_pending, 396 .irq_pending = i8259A_irq_pending,
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 91fd0c70a18a..83ec0175f986 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
67 for_each_online_cpu(j) 67 for_each_online_cpu(j)
68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); 68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
69 seq_printf(p, " Performance monitoring interrupts\n"); 69 seq_printf(p, " Performance monitoring interrupts\n");
70 seq_printf(p, "%*s: ", prec, "PND"); 70 seq_printf(p, "%*s: ", prec, "IWI");
71 for_each_online_cpu(j) 71 for_each_online_cpu(j)
72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); 72 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
73 seq_printf(p, " Performance pending work\n"); 73 seq_printf(p, " IRQ work interrupts\n");
74#endif 74#endif
75 if (x86_platform_ipi_callback) { 75 if (x86_platform_ipi_callback) {
76 seq_printf(p, "%*s: ", prec, "PLT"); 76 seq_printf(p, "%*s: ", prec, "PLT");
@@ -159,7 +159,7 @@ int show_interrupts(struct seq_file *p, void *v)
159 seq_printf(p, "%*d: ", prec, i); 159 seq_printf(p, "%*d: ", prec, i);
160 for_each_online_cpu(j) 160 for_each_online_cpu(j)
161 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 161 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
162 seq_printf(p, " %8s", desc->chip->name); 162 seq_printf(p, " %8s", desc->irq_data.chip->name);
163 seq_printf(p, "-%-8s", desc->name); 163 seq_printf(p, "-%-8s", desc->name);
164 164
165 if (action) { 165 if (action) {
@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
185 sum += irq_stats(cpu)->apic_timer_irqs; 185 sum += irq_stats(cpu)->apic_timer_irqs;
186 sum += irq_stats(cpu)->irq_spurious_count; 186 sum += irq_stats(cpu)->irq_spurious_count;
187 sum += irq_stats(cpu)->apic_perf_irqs; 187 sum += irq_stats(cpu)->apic_perf_irqs;
188 sum += irq_stats(cpu)->apic_pending_irqs; 188 sum += irq_stats(cpu)->apic_irq_work_irqs;
189#endif 189#endif
190 if (x86_platform_ipi_callback) 190 if (x86_platform_ipi_callback)
191 sum += irq_stats(cpu)->x86_platform_ipis; 191 sum += irq_stats(cpu)->x86_platform_ipis;
@@ -282,6 +282,7 @@ void fixup_irqs(void)
282 unsigned int irq, vector; 282 unsigned int irq, vector;
283 static int warned; 283 static int warned;
284 struct irq_desc *desc; 284 struct irq_desc *desc;
285 struct irq_data *data;
285 286
286 for_each_irq_desc(irq, desc) { 287 for_each_irq_desc(irq, desc) {
287 int break_affinity = 0; 288 int break_affinity = 0;
@@ -296,7 +297,8 @@ void fixup_irqs(void)
296 /* interrupt's are disabled at this point */ 297 /* interrupt's are disabled at this point */
297 raw_spin_lock(&desc->lock); 298 raw_spin_lock(&desc->lock);
298 299
299 affinity = desc->affinity; 300 data = &desc->irq_data;
301 affinity = data->affinity;
300 if (!irq_has_action(irq) || 302 if (!irq_has_action(irq) ||
301 cpumask_equal(affinity, cpu_online_mask)) { 303 cpumask_equal(affinity, cpu_online_mask)) {
302 raw_spin_unlock(&desc->lock); 304 raw_spin_unlock(&desc->lock);
@@ -315,16 +317,16 @@ void fixup_irqs(void)
315 affinity = cpu_all_mask; 317 affinity = cpu_all_mask;
316 } 318 }
317 319
318 if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask) 320 if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask)
319 desc->chip->mask(irq); 321 data->chip->irq_mask(data);
320 322
321 if (desc->chip->set_affinity) 323 if (data->chip->irq_set_affinity)
322 desc->chip->set_affinity(irq, affinity); 324 data->chip->irq_set_affinity(data, affinity, true);
323 else if (!(warned++)) 325 else if (!(warned++))
324 set_affinity = 0; 326 set_affinity = 0;
325 327
326 if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) 328 if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask)
327 desc->chip->unmask(irq); 329 data->chip->irq_unmask(data);
328 330
329 raw_spin_unlock(&desc->lock); 331 raw_spin_unlock(&desc->lock);
330 332
@@ -355,10 +357,10 @@ void fixup_irqs(void)
355 if (irr & (1 << (vector % 32))) { 357 if (irr & (1 << (vector % 32))) {
356 irq = __get_cpu_var(vector_irq)[vector]; 358 irq = __get_cpu_var(vector_irq)[vector];
357 359
358 desc = irq_to_desc(irq); 360 data = irq_get_irq_data(irq);
359 raw_spin_lock(&desc->lock); 361 raw_spin_lock(&desc->lock);
360 if (desc->chip->retrigger) 362 if (data->chip->irq_retrigger)
361 desc->chip->retrigger(irq); 363 data->chip->irq_retrigger(data);
362 raw_spin_unlock(&desc->lock); 364 raw_spin_unlock(&desc->lock);
363 } 365 }
364 } 366 }
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
new file mode 100644
index 000000000000..ca8f703a1e70
--- /dev/null
+++ b/arch/x86/kernel/irq_work.c
@@ -0,0 +1,30 @@
1/*
2 * x86 specific code for irq_work
3 *
4 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
5 */
6
7#include <linux/kernel.h>
8#include <linux/irq_work.h>
9#include <linux/hardirq.h>
10#include <asm/apic.h>
11
12void smp_irq_work_interrupt(struct pt_regs *regs)
13{
14 irq_enter();
15 ack_APIC_irq();
16 inc_irq_stat(apic_irq_work_irqs);
17 irq_work_run();
18 irq_exit();
19}
20
21void arch_irq_work_raise(void)
22{
23#ifdef CONFIG_X86_LOCAL_APIC
24 if (!cpu_has_apic)
25 return;
26
27 apic->send_IPI_self(IRQ_WORK_VECTOR);
28 apic_wait_icr_idle();
29#endif
30}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 990ae7cfc578..c752e973958d 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -100,6 +100,8 @@ int vector_used_by_percpu_irq(unsigned int vector)
100 100
101void __init init_ISA_irqs(void) 101void __init init_ISA_irqs(void)
102{ 102{
103 struct irq_chip *chip = legacy_pic->chip;
104 const char *name = chip->name;
103 int i; 105 int i;
104 106
105#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) 107#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
@@ -107,19 +109,8 @@ void __init init_ISA_irqs(void)
107#endif 109#endif
108 legacy_pic->init(0); 110 legacy_pic->init(0);
109 111
110 /* 112 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
111 * 16 old-style INTA-cycle interrupts: 113 set_irq_chip_and_handler_name(i, chip, handle_level_irq, name);
112 */
113 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) {
114 struct irq_desc *desc = irq_to_desc(i);
115
116 desc->status = IRQ_DISABLED;
117 desc->action = NULL;
118 desc->depth = 1;
119
120 set_irq_chip_and_handler_name(i, &i8259A_chip,
121 handle_level_irq, "XT");
122 }
123} 114}
124 115
125void __init init_IRQ(void) 116void __init init_IRQ(void)
@@ -224,9 +215,9 @@ static void __init apic_intr_init(void)
224 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 215 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
225 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 216 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
226 217
227 /* Performance monitoring interrupts: */ 218 /* IRQ work interrupts: */
228# ifdef CONFIG_PERF_EVENTS 219# ifdef CONFIG_IRQ_WORK
229 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); 220 alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
230# endif 221# endif
231 222
232#endif 223#endif
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
new file mode 100644
index 000000000000..961b6b30ba90
--- /dev/null
+++ b/arch/x86/kernel/jump_label.c
@@ -0,0 +1,50 @@
1/*
2 * jump label x86 support
3 *
4 * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
5 *
6 */
7#include <linux/jump_label.h>
8#include <linux/memory.h>
9#include <linux/uaccess.h>
10#include <linux/module.h>
11#include <linux/list.h>
12#include <linux/jhash.h>
13#include <linux/cpu.h>
14#include <asm/kprobes.h>
15#include <asm/alternative.h>
16
17#ifdef HAVE_JUMP_LABEL
18
19union jump_code_union {
20 char code[JUMP_LABEL_NOP_SIZE];
21 struct {
22 char jump;
23 int offset;
24 } __attribute__((packed));
25};
26
27void arch_jump_label_transform(struct jump_entry *entry,
28 enum jump_label_type type)
29{
30 union jump_code_union code;
31
32 if (type == JUMP_LABEL_ENABLE) {
33 code.jump = 0xe9;
34 code.offset = entry->target -
35 (entry->code + JUMP_LABEL_NOP_SIZE);
36 } else
37 memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
38 get_online_cpus();
39 mutex_lock(&text_mutex);
40 text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
41 mutex_unlock(&text_mutex);
42 put_online_cpus();
43}
44
45void arch_jump_label_text_poke_early(jump_label_t addr)
46{
47 text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
48}
49
50#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 770ebfb349e9..1cbd54c0df99 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
230 return 0; 230 return 0;
231} 231}
232 232
233/* Dummy buffers for kallsyms_lookup */
234static char __dummy_buf[KSYM_NAME_LEN];
235
236/* Check if paddr is at an instruction boundary */ 233/* Check if paddr is at an instruction boundary */
237static int __kprobes can_probe(unsigned long paddr) 234static int __kprobes can_probe(unsigned long paddr)
238{ 235{
@@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr)
241 struct insn insn; 238 struct insn insn;
242 kprobe_opcode_t buf[MAX_INSN_SIZE]; 239 kprobe_opcode_t buf[MAX_INSN_SIZE];
243 240
244 if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) 241 if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
245 return 0; 242 return 0;
246 243
247 /* Decode instructions */ 244 /* Decode instructions */
@@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1129 *(unsigned long *)addr = val; 1126 *(unsigned long *)addr = val;
1130} 1127}
1131 1128
1132void __kprobes kprobes_optinsn_template_holder(void) 1129static void __used __kprobes kprobes_optinsn_template_holder(void)
1133{ 1130{
1134 asm volatile ( 1131 asm volatile (
1135 ".global optprobe_template_entry\n" 1132 ".global optprobe_template_entry\n"
@@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
1221 } 1218 }
1222 /* Check whether the address range is reserved */ 1219 /* Check whether the address range is reserved */
1223 if (ftrace_text_reserved(src, src + len - 1) || 1220 if (ftrace_text_reserved(src, src + len - 1) ||
1224 alternatives_text_reserved(src, src + len - 1)) 1221 alternatives_text_reserved(src, src + len - 1) ||
1222 jump_label_text_reserved(src, src + len - 1))
1225 return -EBUSY; 1223 return -EBUSY;
1226 1224
1227 return len; 1225 return len;
@@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr)
1269 unsigned long addr, size = 0, offset = 0; 1267 unsigned long addr, size = 0, offset = 0;
1270 struct insn insn; 1268 struct insn insn;
1271 kprobe_opcode_t buf[MAX_INSN_SIZE]; 1269 kprobe_opcode_t buf[MAX_INSN_SIZE];
1272 /* Dummy buffers for lookup_symbol_attrs */
1273 static char __dummy_buf[KSYM_NAME_LEN];
1274 1270
1275 /* Lookup symbol including addr */ 1271 /* Lookup symbol including addr */
1276 if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) 1272 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
1277 return 0; 1273 return 0;
1278 1274
1279 /* Check there is enough space for a relative jump. */ 1275 /* Check there is enough space for a relative jump. */
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 035c8c529181..b3ea9db39db6 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -36,7 +36,7 @@ static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
36 if (!page) 36 if (!page)
37 goto out; 37 goto out;
38 pud = (pud_t *)page_address(page); 38 pud = (pud_t *)page_address(page);
39 memset(pud, 0, PAGE_SIZE); 39 clear_page(pud);
40 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); 40 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
41 } 41 }
42 pud = pud_offset(pgd, addr); 42 pud = pud_offset(pgd, addr);
@@ -45,7 +45,7 @@ static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
45 if (!page) 45 if (!page)
46 goto out; 46 goto out;
47 pmd = (pmd_t *)page_address(page); 47 pmd = (pmd_t *)page_address(page);
48 memset(pmd, 0, PAGE_SIZE); 48 clear_page(pmd);
49 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); 49 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
50 } 50 }
51 pmd = pmd_offset(pud, addr); 51 pmd = pmd_offset(pud, addr);
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 1c355c550960..8f2956091735 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -239,6 +239,9 @@ int module_finalize(const Elf_Ehdr *hdr,
239 apply_paravirt(pseg, pseg + para->sh_size); 239 apply_paravirt(pseg, pseg + para->sh_size);
240 } 240 }
241 241
242 /* make jump label nops */
243 jump_label_apply_nops(me);
244
242 return 0; 245 return 0;
243} 246}
244 247
diff --git a/arch/x86/kernel/olpc-xo1.c b/arch/x86/kernel/olpc-xo1.c
new file mode 100644
index 000000000000..f5442c03abc3
--- /dev/null
+++ b/arch/x86/kernel/olpc-xo1.c
@@ -0,0 +1,140 @@
1/*
2 * Support for features of the OLPC XO-1 laptop
3 *
4 * Copyright (C) 2010 One Laptop per Child
5 * Copyright (C) 2006 Red Hat, Inc.
6 * Copyright (C) 2006 Advanced Micro Devices, Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/platform_device.h>
18#include <linux/pm.h>
19
20#include <asm/io.h>
21#include <asm/olpc.h>
22
23#define DRV_NAME "olpc-xo1"
24
25#define PMS_BAR 4
26#define ACPI_BAR 5
27
28/* PMC registers (PMS block) */
29#define PM_SCLK 0x10
30#define PM_IN_SLPCTL 0x20
31#define PM_WKXD 0x34
32#define PM_WKD 0x30
33#define PM_SSC 0x54
34
35/* PM registers (ACPI block) */
36#define PM1_CNT 0x08
37#define PM_GPE0_STS 0x18
38
39static unsigned long acpi_base;
40static unsigned long pms_base;
41
42static void xo1_power_off(void)
43{
44 printk(KERN_INFO "OLPC XO-1 power off sequence...\n");
45
46 /* Enable all of these controls with 0 delay */
47 outl(0x40000000, pms_base + PM_SCLK);
48 outl(0x40000000, pms_base + PM_IN_SLPCTL);
49 outl(0x40000000, pms_base + PM_WKXD);
50 outl(0x40000000, pms_base + PM_WKD);
51
52 /* Clear status bits (possibly unnecessary) */
53 outl(0x0002ffff, pms_base + PM_SSC);
54 outl(0xffffffff, acpi_base + PM_GPE0_STS);
55
56 /* Write SLP_EN bit to start the machinery */
57 outl(0x00002000, acpi_base + PM1_CNT);
58}
59
60/* Read the base addresses from the PCI BAR info */
61static int __devinit setup_bases(struct pci_dev *pdev)
62{
63 int r;
64
65 r = pci_enable_device_io(pdev);
66 if (r) {
67 dev_err(&pdev->dev, "can't enable device IO\n");
68 return r;
69 }
70
71 r = pci_request_region(pdev, ACPI_BAR, DRV_NAME);
72 if (r) {
73 dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", ACPI_BAR);
74 return r;
75 }
76
77 r = pci_request_region(pdev, PMS_BAR, DRV_NAME);
78 if (r) {
79 dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", PMS_BAR);
80 pci_release_region(pdev, ACPI_BAR);
81 return r;
82 }
83
84 acpi_base = pci_resource_start(pdev, ACPI_BAR);
85 pms_base = pci_resource_start(pdev, PMS_BAR);
86
87 return 0;
88}
89
90static int __devinit olpc_xo1_probe(struct platform_device *pdev)
91{
92 struct pci_dev *pcidev;
93 int r;
94
95 pcidev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA,
96 NULL);
97 if (!pdev)
98 return -ENODEV;
99
100 r = setup_bases(pcidev);
101 if (r)
102 return r;
103
104 pm_power_off = xo1_power_off;
105
106 printk(KERN_INFO "OLPC XO-1 support registered\n");
107 return 0;
108}
109
110static int __devexit olpc_xo1_remove(struct platform_device *pdev)
111{
112 pm_power_off = NULL;
113 return 0;
114}
115
116static struct platform_driver olpc_xo1_driver = {
117 .driver = {
118 .name = DRV_NAME,
119 .owner = THIS_MODULE,
120 },
121 .probe = olpc_xo1_probe,
122 .remove = __devexit_p(olpc_xo1_remove),
123};
124
125static int __init olpc_xo1_init(void)
126{
127 return platform_driver_register(&olpc_xo1_driver);
128}
129
130static void __exit olpc_xo1_exit(void)
131{
132 platform_driver_unregister(&olpc_xo1_driver);
133}
134
135MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>");
136MODULE_LICENSE("GPL");
137MODULE_ALIAS("platform:olpc-xo1");
138
139module_init(olpc_xo1_init);
140module_exit(olpc_xo1_exit);
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 0e0cdde519be..edaf3fe8dc5e 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -17,6 +17,7 @@
17#include <linux/spinlock.h> 17#include <linux/spinlock.h>
18#include <linux/io.h> 18#include <linux/io.h>
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/platform_device.h>
20 21
21#include <asm/geode.h> 22#include <asm/geode.h>
22#include <asm/setup.h> 23#include <asm/setup.h>
@@ -114,6 +115,7 @@ int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
114 unsigned long flags; 115 unsigned long flags;
115 int ret = -EIO; 116 int ret = -EIO;
116 int i; 117 int i;
118 int restarts = 0;
117 119
118 spin_lock_irqsave(&ec_lock, flags); 120 spin_lock_irqsave(&ec_lock, flags);
119 121
@@ -169,7 +171,9 @@ restart:
169 if (wait_on_obf(0x6c, 1)) { 171 if (wait_on_obf(0x6c, 1)) {
170 printk(KERN_ERR "olpc-ec: timeout waiting for" 172 printk(KERN_ERR "olpc-ec: timeout waiting for"
171 " EC to provide data!\n"); 173 " EC to provide data!\n");
172 goto restart; 174 if (restarts++ < 10)
175 goto restart;
176 goto err;
173 } 177 }
174 outbuf[i] = inb(0x68); 178 outbuf[i] = inb(0x68);
175 pr_devel("olpc-ec: received 0x%x\n", outbuf[i]); 179 pr_devel("olpc-ec: received 0x%x\n", outbuf[i]);
@@ -183,8 +187,21 @@ err:
183} 187}
184EXPORT_SYMBOL_GPL(olpc_ec_cmd); 188EXPORT_SYMBOL_GPL(olpc_ec_cmd);
185 189
186#ifdef CONFIG_OLPC_OPENFIRMWARE 190static bool __init check_ofw_architecture(void)
187static void __init platform_detect(void) 191{
192 size_t propsize;
193 char olpc_arch[5];
194 const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 };
195 void *res[] = { &propsize };
196
197 if (olpc_ofw("getprop", args, res)) {
198 printk(KERN_ERR "ofw: getprop call failed!\n");
199 return false;
200 }
201 return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0;
202}
203
204static u32 __init get_board_revision(void)
188{ 205{
189 size_t propsize; 206 size_t propsize;
190 __be32 rev; 207 __be32 rev;
@@ -193,45 +210,43 @@ static void __init platform_detect(void)
193 210
194 if (olpc_ofw("getprop", args, res) || propsize != 4) { 211 if (olpc_ofw("getprop", args, res) || propsize != 4) {
195 printk(KERN_ERR "ofw: getprop call failed!\n"); 212 printk(KERN_ERR "ofw: getprop call failed!\n");
196 rev = cpu_to_be32(0); 213 return cpu_to_be32(0);
197 } 214 }
198 olpc_platform_info.boardrev = be32_to_cpu(rev); 215 return be32_to_cpu(rev);
199} 216}
200#else 217
201static void __init platform_detect(void) 218static bool __init platform_detect(void)
202{ 219{
203 /* stopgap until OFW support is added to the kernel */ 220 if (!check_ofw_architecture())
204 olpc_platform_info.boardrev = olpc_board(0xc2); 221 return false;
222 olpc_platform_info.flags |= OLPC_F_PRESENT;
223 olpc_platform_info.boardrev = get_board_revision();
224 return true;
205} 225}
206#endif
207 226
208static int __init olpc_init(void) 227static int __init add_xo1_platform_devices(void)
209{ 228{
210 unsigned char *romsig; 229 struct platform_device *pdev;
211 230
212 /* The ioremap check is dangerous; limit what we run it on */ 231 pdev = platform_device_register_simple("xo1-rfkill", -1, NULL, 0);
213 if (!is_geode() || cs5535_has_vsa2()) 232 if (IS_ERR(pdev))
214 return 0; 233 return PTR_ERR(pdev);
215 234
216 spin_lock_init(&ec_lock); 235 pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0);
236 if (IS_ERR(pdev))
237 return PTR_ERR(pdev);
217 238
218 romsig = ioremap(0xffffffc0, 16); 239 return 0;
219 if (!romsig) 240}
220 return 0;
221 241
222 if (strncmp(romsig, "CL1 Q", 7)) 242static int __init olpc_init(void)
223 goto unmap; 243{
224 if (strncmp(romsig+6, romsig+13, 3)) { 244 int r = 0;
225 printk(KERN_INFO "OLPC BIOS signature looks invalid. "
226 "Assuming not OLPC\n");
227 goto unmap;
228 }
229 245
230 printk(KERN_INFO "OLPC board with OpenFirmware %.16s\n", romsig); 246 if (!olpc_ofw_present() || !platform_detect())
231 olpc_platform_info.flags |= OLPC_F_PRESENT; 247 return 0;
232 248
233 /* get the platform revision */ 249 spin_lock_init(&ec_lock);
234 platform_detect();
235 250
236 /* assume B1 and above models always have a DCON */ 251 /* assume B1 and above models always have a DCON */
237 if (olpc_board_at_least(olpc_board(0xb1))) 252 if (olpc_board_at_least(olpc_board(0xb1)))
@@ -242,8 +257,10 @@ static int __init olpc_init(void)
242 (unsigned char *) &olpc_platform_info.ecver, 1); 257 (unsigned char *) &olpc_platform_info.ecver, 1);
243 258
244#ifdef CONFIG_PCI_OLPC 259#ifdef CONFIG_PCI_OLPC
245 /* If the VSA exists let it emulate PCI, if not emulate in kernel */ 260 /* If the VSA exists let it emulate PCI, if not emulate in kernel.
246 if (!cs5535_has_vsa2()) 261 * XO-1 only. */
262 if (olpc_platform_info.boardrev < olpc_board_pre(0xd0) &&
263 !cs5535_has_vsa2())
247 x86_init.pci.arch_init = pci_olpc_init; 264 x86_init.pci.arch_init = pci_olpc_init;
248#endif 265#endif
249 266
@@ -252,8 +269,12 @@ static int __init olpc_init(void)
252 olpc_platform_info.boardrev >> 4, 269 olpc_platform_info.boardrev >> 4,
253 olpc_platform_info.ecver); 270 olpc_platform_info.ecver);
254 271
255unmap: 272 if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) { /* XO-1 */
256 iounmap(romsig); 273 r = add_xo1_platform_devices();
274 if (r)
275 return r;
276 }
277
257 return 0; 278 return 0;
258} 279}
259 280
diff --git a/arch/x86/kernel/olpc_ofw.c b/arch/x86/kernel/olpc_ofw.c
index 3218aa71ab5e..787320464379 100644
--- a/arch/x86/kernel/olpc_ofw.c
+++ b/arch/x86/kernel/olpc_ofw.c
@@ -74,6 +74,12 @@ int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res,
74} 74}
75EXPORT_SYMBOL_GPL(__olpc_ofw); 75EXPORT_SYMBOL_GPL(__olpc_ofw);
76 76
77bool olpc_ofw_present(void)
78{
79 return olpc_ofw_cif != NULL;
80}
81EXPORT_SYMBOL_GPL(olpc_ofw_present);
82
77/* OFW cif _should_ be above this address */ 83/* OFW cif _should_ be above this address */
78#define OFW_MIN 0xff000000 84#define OFW_MIN 0xff000000
79 85
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1db183ed7c01..c5b250011fd4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -413,7 +413,6 @@ struct pv_mmu_ops pv_mmu_ops = {
413 413
414 .alloc_pte = paravirt_nop, 414 .alloc_pte = paravirt_nop,
415 .alloc_pmd = paravirt_nop, 415 .alloc_pmd = paravirt_nop,
416 .alloc_pmd_clone = paravirt_nop,
417 .alloc_pud = paravirt_nop, 416 .alloc_pud = paravirt_nop,
418 .release_pte = paravirt_nop, 417 .release_pte = paravirt_nop,
419 .release_pmd = paravirt_nop, 418 .release_pmd = paravirt_nop,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 078d4ec1a9d9..f56a117cef68 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -47,6 +47,7 @@
47#include <asm/rio.h> 47#include <asm/rio.h>
48#include <asm/bios_ebda.h> 48#include <asm/bios_ebda.h>
49#include <asm/x86_init.h> 49#include <asm/x86_init.h>
50#include <asm/iommu_table.h>
50 51
51#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT 52#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
52int use_calgary __read_mostly = 1; 53int use_calgary __read_mostly = 1;
@@ -1364,7 +1365,7 @@ static int __init calgary_iommu_init(void)
1364 return 0; 1365 return 0;
1365} 1366}
1366 1367
1367void __init detect_calgary(void) 1368int __init detect_calgary(void)
1368{ 1369{
1369 int bus; 1370 int bus;
1370 void *tbl; 1371 void *tbl;
@@ -1378,13 +1379,13 @@ void __init detect_calgary(void)
1378 * another HW IOMMU already, bail out. 1379 * another HW IOMMU already, bail out.
1379 */ 1380 */
1380 if (no_iommu || iommu_detected) 1381 if (no_iommu || iommu_detected)
1381 return; 1382 return -ENODEV;
1382 1383
1383 if (!use_calgary) 1384 if (!use_calgary)
1384 return; 1385 return -ENODEV;
1385 1386
1386 if (!early_pci_allowed()) 1387 if (!early_pci_allowed())
1387 return; 1388 return -ENODEV;
1388 1389
1389 printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n"); 1390 printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n");
1390 1391
@@ -1410,13 +1411,13 @@ void __init detect_calgary(void)
1410 if (!rio_table_hdr) { 1411 if (!rio_table_hdr) {
1411 printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table " 1412 printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table "
1412 "in EBDA - bailing!\n"); 1413 "in EBDA - bailing!\n");
1413 return; 1414 return -ENODEV;
1414 } 1415 }
1415 1416
1416 ret = build_detail_arrays(); 1417 ret = build_detail_arrays();
1417 if (ret) { 1418 if (ret) {
1418 printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret); 1419 printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret);
1419 return; 1420 return -ENOMEM;
1420 } 1421 }
1421 1422
1422 specified_table_size = determine_tce_table_size((is_kdump_kernel() ? 1423 specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
@@ -1464,7 +1465,7 @@ void __init detect_calgary(void)
1464 1465
1465 x86_init.iommu.iommu_init = calgary_iommu_init; 1466 x86_init.iommu.iommu_init = calgary_iommu_init;
1466 } 1467 }
1467 return; 1468 return calgary_found;
1468 1469
1469cleanup: 1470cleanup:
1470 for (--bus; bus >= 0; --bus) { 1471 for (--bus; bus >= 0; --bus) {
@@ -1473,6 +1474,7 @@ cleanup:
1473 if (info->tce_space) 1474 if (info->tce_space)
1474 free_tce_table(info->tce_space); 1475 free_tce_table(info->tce_space);
1475 } 1476 }
1477 return -ENOMEM;
1476} 1478}
1477 1479
1478static int __init calgary_parse_options(char *p) 1480static int __init calgary_parse_options(char *p)
@@ -1594,3 +1596,5 @@ static int __init calgary_fixup_tce_spaces(void)
1594 * and before device_initcall. 1596 * and before device_initcall.
1595 */ 1597 */
1596rootfs_initcall(calgary_fixup_tce_spaces); 1598rootfs_initcall(calgary_fixup_tce_spaces);
1599
1600IOMMU_INIT_POST(detect_calgary);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 9f07cfcbd3a5..9ea999a4dcc1 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -11,9 +11,8 @@
11#include <asm/iommu.h> 11#include <asm/iommu.h>
12#include <asm/gart.h> 12#include <asm/gart.h>
13#include <asm/calgary.h> 13#include <asm/calgary.h>
14#include <asm/amd_iommu.h>
15#include <asm/x86_init.h> 14#include <asm/x86_init.h>
16#include <asm/xen/swiotlb-xen.h> 15#include <asm/iommu_table.h>
17 16
18static int forbid_dac __read_mostly; 17static int forbid_dac __read_mostly;
19 18
@@ -45,6 +44,8 @@ int iommu_detected __read_mostly = 0;
45 */ 44 */
46int iommu_pass_through __read_mostly; 45int iommu_pass_through __read_mostly;
47 46
47extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
48
48/* Dummy device used for NULL arguments (normally ISA). */ 49/* Dummy device used for NULL arguments (normally ISA). */
49struct device x86_dma_fallback_dev = { 50struct device x86_dma_fallback_dev = {
50 .init_name = "fallback device", 51 .init_name = "fallback device",
@@ -130,26 +131,24 @@ static void __init dma32_free_bootmem(void)
130 131
131void __init pci_iommu_alloc(void) 132void __init pci_iommu_alloc(void)
132{ 133{
134 struct iommu_table_entry *p;
135
133 /* free the range so iommu could get some range less than 4G */ 136 /* free the range so iommu could get some range less than 4G */
134 dma32_free_bootmem(); 137 dma32_free_bootmem();
135 138
136 if (pci_xen_swiotlb_detect() || pci_swiotlb_detect()) 139 sort_iommu_table(__iommu_table, __iommu_table_end);
137 goto out; 140 check_iommu_entries(__iommu_table, __iommu_table_end);
138
139 gart_iommu_hole_init();
140
141 detect_calgary();
142
143 detect_intel_iommu();
144 141
145 /* needs to be called after gart_iommu_hole_init */ 142 for (p = __iommu_table; p < __iommu_table_end; p++) {
146 amd_iommu_detect(); 143 if (p && p->detect && p->detect() > 0) {
147out: 144 p->flags |= IOMMU_DETECTED;
148 pci_xen_swiotlb_init(); 145 if (p->early_init)
149 146 p->early_init();
150 pci_swiotlb_init(); 147 if (p->flags & IOMMU_FINISH_IF_DETECTED)
148 break;
149 }
150 }
151} 151}
152
153void *dma_generic_alloc_coherent(struct device *dev, size_t size, 152void *dma_generic_alloc_coherent(struct device *dev, size_t size,
154 dma_addr_t *dma_addr, gfp_t flag) 153 dma_addr_t *dma_addr, gfp_t flag)
155{ 154{
@@ -292,6 +291,7 @@ EXPORT_SYMBOL(dma_supported);
292 291
293static int __init pci_iommu_init(void) 292static int __init pci_iommu_init(void)
294{ 293{
294 struct iommu_table_entry *p;
295 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 295 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
296 296
297#ifdef CONFIG_PCI 297#ifdef CONFIG_PCI
@@ -299,12 +299,10 @@ static int __init pci_iommu_init(void)
299#endif 299#endif
300 x86_init.iommu.iommu_init(); 300 x86_init.iommu.iommu_init();
301 301
302 if (swiotlb || xen_swiotlb) { 302 for (p = __iommu_table; p < __iommu_table_end; p++) {
303 printk(KERN_INFO "PCI-DMA: " 303 if (p && (p->flags & IOMMU_DETECTED) && p->late_init)
304 "Using software bounce buffering for IO (SWIOTLB)\n"); 304 p->late_init();
305 swiotlb_print_info(); 305 }
306 } else
307 swiotlb_free();
308 306
309 return 0; 307 return 0;
310} 308}
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 0f7f130caa67..ba0f0ca9f280 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -39,8 +39,9 @@
39#include <asm/cacheflush.h> 39#include <asm/cacheflush.h>
40#include <asm/swiotlb.h> 40#include <asm/swiotlb.h>
41#include <asm/dma.h> 41#include <asm/dma.h>
42#include <asm/k8.h> 42#include <asm/amd_nb.h>
43#include <asm/x86_init.h> 43#include <asm/x86_init.h>
44#include <asm/iommu_table.h>
44 45
45static unsigned long iommu_bus_base; /* GART remapping area (physical) */ 46static unsigned long iommu_bus_base; /* GART remapping area (physical) */
46static unsigned long iommu_size; /* size of remapping area bytes */ 47static unsigned long iommu_size; /* size of remapping area bytes */
@@ -560,8 +561,11 @@ static void enable_gart_translations(void)
560{ 561{
561 int i; 562 int i;
562 563
563 for (i = 0; i < num_k8_northbridges; i++) { 564 if (!k8_northbridges.gart_supported)
564 struct pci_dev *dev = k8_northbridges[i]; 565 return;
566
567 for (i = 0; i < k8_northbridges.num; i++) {
568 struct pci_dev *dev = k8_northbridges.nb_misc[i];
565 569
566 enable_gart_translation(dev, __pa(agp_gatt_table)); 570 enable_gart_translation(dev, __pa(agp_gatt_table));
567 } 571 }
@@ -592,16 +596,19 @@ static void gart_fixup_northbridges(struct sys_device *dev)
592 if (!fix_up_north_bridges) 596 if (!fix_up_north_bridges)
593 return; 597 return;
594 598
599 if (!k8_northbridges.gart_supported)
600 return;
601
595 pr_info("PCI-DMA: Restoring GART aperture settings\n"); 602 pr_info("PCI-DMA: Restoring GART aperture settings\n");
596 603
597 for (i = 0; i < num_k8_northbridges; i++) { 604 for (i = 0; i < k8_northbridges.num; i++) {
598 struct pci_dev *dev = k8_northbridges[i]; 605 struct pci_dev *dev = k8_northbridges.nb_misc[i];
599 606
600 /* 607 /*
601 * Don't enable translations just yet. That is the next 608 * Don't enable translations just yet. That is the next
602 * step. Restore the pre-suspend aperture settings. 609 * step. Restore the pre-suspend aperture settings.
603 */ 610 */
604 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); 611 gart_set_size_and_enable(dev, aperture_order);
605 pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); 612 pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
606 } 613 }
607} 614}
@@ -649,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
649 656
650 aper_size = aper_base = info->aper_size = 0; 657 aper_size = aper_base = info->aper_size = 0;
651 dev = NULL; 658 dev = NULL;
652 for (i = 0; i < num_k8_northbridges; i++) { 659 for (i = 0; i < k8_northbridges.num; i++) {
653 dev = k8_northbridges[i]; 660 dev = k8_northbridges.nb_misc[i];
654 new_aper_base = read_aperture(dev, &new_aper_size); 661 new_aper_base = read_aperture(dev, &new_aper_size);
655 if (!new_aper_base) 662 if (!new_aper_base)
656 goto nommu; 663 goto nommu;
@@ -718,10 +725,13 @@ static void gart_iommu_shutdown(void)
718 if (!no_agp) 725 if (!no_agp)
719 return; 726 return;
720 727
721 for (i = 0; i < num_k8_northbridges; i++) { 728 if (!k8_northbridges.gart_supported)
729 return;
730
731 for (i = 0; i < k8_northbridges.num; i++) {
722 u32 ctl; 732 u32 ctl;
723 733
724 dev = k8_northbridges[i]; 734 dev = k8_northbridges.nb_misc[i];
725 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); 735 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
726 736
727 ctl &= ~GARTEN; 737 ctl &= ~GARTEN;
@@ -739,7 +749,7 @@ int __init gart_iommu_init(void)
739 unsigned long scratch; 749 unsigned long scratch;
740 long i; 750 long i;
741 751
742 if (num_k8_northbridges == 0) 752 if (!k8_northbridges.gart_supported)
743 return 0; 753 return 0;
744 754
745#ifndef CONFIG_AGP_AMD64 755#ifndef CONFIG_AGP_AMD64
@@ -896,3 +906,4 @@ void __init gart_parse_options(char *p)
896 } 906 }
897 } 907 }
898} 908}
909IOMMU_INIT_POST(gart_iommu_hole_init);
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
new file mode 100644
index 000000000000..55d745ec1181
--- /dev/null
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -0,0 +1,89 @@
1#include <linux/dma-mapping.h>
2#include <asm/iommu_table.h>
3#include <linux/string.h>
4#include <linux/kallsyms.h>
5
6
7#define DEBUG 1
8
9static struct iommu_table_entry * __init
10find_dependents_of(struct iommu_table_entry *start,
11 struct iommu_table_entry *finish,
12 struct iommu_table_entry *q)
13{
14 struct iommu_table_entry *p;
15
16 if (!q)
17 return NULL;
18
19 for (p = start; p < finish; p++)
20 if (p->detect == q->depend)
21 return p;
22
23 return NULL;
24}
25
26
27void __init sort_iommu_table(struct iommu_table_entry *start,
28 struct iommu_table_entry *finish) {
29
30 struct iommu_table_entry *p, *q, tmp;
31
32 for (p = start; p < finish; p++) {
33again:
34 q = find_dependents_of(start, finish, p);
35 /* We are bit sneaky here. We use the memory address to figure
36 * out if the node we depend on is past our point, if so, swap.
37 */
38 if (q > p) {
39 tmp = *p;
40 memmove(p, q, sizeof(*p));
41 *q = tmp;
42 goto again;
43 }
44 }
45
46}
47
48#ifdef DEBUG
49void __init check_iommu_entries(struct iommu_table_entry *start,
50 struct iommu_table_entry *finish)
51{
52 struct iommu_table_entry *p, *q, *x;
53 char sym_p[KSYM_SYMBOL_LEN];
54 char sym_q[KSYM_SYMBOL_LEN];
55
56 /* Simple cyclic dependency checker. */
57 for (p = start; p < finish; p++) {
58 q = find_dependents_of(start, finish, p);
59 x = find_dependents_of(start, finish, q);
60 if (p == x) {
61 sprint_symbol(sym_p, (unsigned long)p->detect);
62 sprint_symbol(sym_q, (unsigned long)q->detect);
63
64 printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \
65 " on %s and vice-versa. BREAKING IT.\n",
66 sym_p, sym_q);
67 /* Heavy handed way..*/
68 x->depend = 0;
69 }
70 }
71
72 for (p = start; p < finish; p++) {
73 q = find_dependents_of(p, finish, p);
74 if (q && q > p) {
75 sprint_symbol(sym_p, (unsigned long)p->detect);
76 sprint_symbol(sym_q, (unsigned long)q->detect);
77
78 printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\
79 "should be called before %s!\n",
80 sym_p, sym_q);
81 }
82 }
83}
84#else
85inline void check_iommu_entries(struct iommu_table_entry *start,
86 struct iommu_table_entry *finish)
87{
88}
89#endif
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index a5bc528d4328..8f972cbddef0 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -10,7 +10,8 @@
10#include <asm/iommu.h> 10#include <asm/iommu.h>
11#include <asm/swiotlb.h> 11#include <asm/swiotlb.h>
12#include <asm/dma.h> 12#include <asm/dma.h>
13 13#include <asm/xen/swiotlb-xen.h>
14#include <asm/iommu_table.h>
14int swiotlb __read_mostly; 15int swiotlb __read_mostly;
15 16
16static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, 17static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -41,25 +42,42 @@ static struct dma_map_ops swiotlb_dma_ops = {
41}; 42};
42 43
43/* 44/*
44 * pci_swiotlb_detect - set swiotlb to 1 if necessary 45 * pci_swiotlb_detect_override - set swiotlb to 1 if necessary
45 * 46 *
46 * This returns non-zero if we are forced to use swiotlb (by the boot 47 * This returns non-zero if we are forced to use swiotlb (by the boot
47 * option). 48 * option).
48 */ 49 */
49int __init pci_swiotlb_detect(void) 50int __init pci_swiotlb_detect_override(void)
50{ 51{
51 int use_swiotlb = swiotlb | swiotlb_force; 52 int use_swiotlb = swiotlb | swiotlb_force;
52 53
54 if (swiotlb_force)
55 swiotlb = 1;
56
57 return use_swiotlb;
58}
59IOMMU_INIT_FINISH(pci_swiotlb_detect_override,
60 pci_xen_swiotlb_detect,
61 pci_swiotlb_init,
62 pci_swiotlb_late_init);
63
64/*
65 * if 4GB or more detected (and iommu=off not set) return 1
66 * and set swiotlb to 1.
67 */
68int __init pci_swiotlb_detect_4gb(void)
69{
53 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 70 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
54#ifdef CONFIG_X86_64 71#ifdef CONFIG_X86_64
55 if (!no_iommu && max_pfn > MAX_DMA32_PFN) 72 if (!no_iommu && max_pfn > MAX_DMA32_PFN)
56 swiotlb = 1; 73 swiotlb = 1;
57#endif 74#endif
58 if (swiotlb_force) 75 return swiotlb;
59 swiotlb = 1;
60
61 return use_swiotlb;
62} 76}
77IOMMU_INIT(pci_swiotlb_detect_4gb,
78 pci_swiotlb_detect_override,
79 pci_swiotlb_init,
80 pci_swiotlb_late_init);
63 81
64void __init pci_swiotlb_init(void) 82void __init pci_swiotlb_init(void)
65{ 83{
@@ -68,3 +86,15 @@ void __init pci_swiotlb_init(void)
68 dma_ops = &swiotlb_dma_ops; 86 dma_ops = &swiotlb_dma_ops;
69 } 87 }
70} 88}
89
90void __init pci_swiotlb_late_init(void)
91{
92 /* An IOMMU turned us off. */
93 if (!swiotlb)
94 swiotlb_free();
95 else {
96 printk(KERN_INFO "PCI-DMA: "
97 "Using software bounce buffering for IO (SWIOTLB)\n");
98 swiotlb_print_info();
99 }
100}
diff --git a/arch/x86/kernel/pmtimer_64.c b/arch/x86/kernel/pmtimer_64.c
deleted file mode 100644
index b112406f1996..000000000000
--- a/arch/x86/kernel/pmtimer_64.c
+++ /dev/null
@@ -1,69 +0,0 @@
1/* Ported over from i386 by AK, original copyright was:
2 *
3 * (C) Dominik Brodowski <linux@brodo.de> 2003
4 *
5 * Driver to use the Power Management Timer (PMTMR) available in some
6 * southbridges as primary timing source for the Linux kernel.
7 *
8 * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
9 * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
10 *
11 * This file is licensed under the GPL v2.
12 *
13 * Dropped all the hardware bug workarounds for now. Hopefully they
14 * are not needed on 64bit chipsets.
15 */
16
17#include <linux/jiffies.h>
18#include <linux/kernel.h>
19#include <linux/time.h>
20#include <linux/init.h>
21#include <linux/cpumask.h>
22#include <linux/acpi_pmtmr.h>
23
24#include <asm/io.h>
25#include <asm/proto.h>
26#include <asm/msr.h>
27#include <asm/vsyscall.h>
28
29static inline u32 cyc2us(u32 cycles)
30{
31 /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
32 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
33 *
34 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
35 * easily be multiplied with 286 (=0x11E) without having to fear
36 * u32 overflows.
37 */
38 cycles *= 286;
39 return (cycles >> 10);
40}
41
42static unsigned pmtimer_wait_tick(void)
43{
44 u32 a, b;
45 for (a = b = inl(pmtmr_ioport) & ACPI_PM_MASK;
46 a == b;
47 b = inl(pmtmr_ioport) & ACPI_PM_MASK)
48 cpu_relax();
49 return b;
50}
51
52/* note: wait time is rounded up to one tick */
53void pmtimer_wait(unsigned us)
54{
55 u32 a, b;
56 a = pmtimer_wait_tick();
57 do {
58 b = inl(pmtmr_ioport);
59 cpu_relax();
60 } while (cyc2us(b - a) < us);
61}
62
63static int __init nopmtimer_setup(char *s)
64{
65 pmtmr_ioport = 0;
66 return 1;
67}
68
69__setup("nopmtimer", nopmtimer_setup);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3d9ea531ddd1..b3d7a3a04f38 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -424,7 +424,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
424 load_TLS(next, cpu); 424 load_TLS(next, cpu);
425 425
426 /* Must be after DS reload */ 426 /* Must be after DS reload */
427 unlazy_fpu(prev_p); 427 __unlazy_fpu(prev_p);
428 428
429 /* Make sure cpu is ready for new context */ 429 /* Make sure cpu is ready for new context */
430 if (preload_fpu) 430 if (preload_fpu)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e3af342fe83a..7a4cf14223ba 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -84,7 +84,7 @@ static int __init reboot_setup(char *str)
84 } 84 }
85 /* we will leave sorting out the final value 85 /* we will leave sorting out the final value
86 when we are ready to reboot, since we might not 86 when we are ready to reboot, since we might not
87 have set up boot_cpu_id or smp_num_cpu */ 87 have detected BSP APIC ID or smp_num_cpu */
88 break; 88 break;
89#endif /* CONFIG_SMP */ 89#endif /* CONFIG_SMP */
90 90
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c3a4fbb2b996..a59f6a6df5e2 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -83,7 +83,6 @@
83#include <asm/dmi.h> 83#include <asm/dmi.h>
84#include <asm/io_apic.h> 84#include <asm/io_apic.h>
85#include <asm/ist.h> 85#include <asm/ist.h>
86#include <asm/vmi.h>
87#include <asm/setup_arch.h> 86#include <asm/setup_arch.h>
88#include <asm/bios_ebda.h> 87#include <asm/bios_ebda.h>
89#include <asm/cacheflush.h> 88#include <asm/cacheflush.h>
@@ -107,11 +106,12 @@
107#include <asm/percpu.h> 106#include <asm/percpu.h>
108#include <asm/topology.h> 107#include <asm/topology.h>
109#include <asm/apicdef.h> 108#include <asm/apicdef.h>
110#include <asm/k8.h> 109#include <asm/amd_nb.h>
111#ifdef CONFIG_X86_64 110#ifdef CONFIG_X86_64
112#include <asm/numa_64.h> 111#include <asm/numa_64.h>
113#endif 112#endif
114#include <asm/mce.h> 113#include <asm/mce.h>
114#include <asm/alternative.h>
115 115
116/* 116/*
117 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. 117 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -125,7 +125,6 @@ unsigned long max_pfn_mapped;
125RESERVE_BRK(dmi_alloc, 65536); 125RESERVE_BRK(dmi_alloc, 65536);
126#endif 126#endif
127 127
128unsigned int boot_cpu_id __read_mostly;
129 128
130static __initdata unsigned long _brk_start = (unsigned long)__brk_base; 129static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
131unsigned long _brk_end = (unsigned long)__brk_base; 130unsigned long _brk_end = (unsigned long)__brk_base;
@@ -618,79 +617,7 @@ static __init void reserve_ibft_region(void)
618 reserve_early_overlap_ok(addr, addr + size, "ibft"); 617 reserve_early_overlap_ok(addr, addr + size, "ibft");
619} 618}
620 619
621#ifdef CONFIG_X86_RESERVE_LOW_64K 620static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
622static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
623{
624 printk(KERN_NOTICE
625 "%s detected: BIOS may corrupt low RAM, working around it.\n",
626 d->ident);
627
628 e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
629 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
630
631 return 0;
632}
633#endif
634
635/* List of systems that have known low memory corruption BIOS problems */
636static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
637#ifdef CONFIG_X86_RESERVE_LOW_64K
638 {
639 .callback = dmi_low_memory_corruption,
640 .ident = "AMI BIOS",
641 .matches = {
642 DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
643 },
644 },
645 {
646 .callback = dmi_low_memory_corruption,
647 .ident = "Phoenix BIOS",
648 .matches = {
649 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
650 },
651 },
652 {
653 .callback = dmi_low_memory_corruption,
654 .ident = "Phoenix/MSC BIOS",
655 .matches = {
656 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
657 },
658 },
659 /*
660 * AMI BIOS with low memory corruption was found on Intel DG45ID and
661 * DG45FC boards.
662 * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
663 * match only DMI_BOARD_NAME and see if there is more bad products
664 * with this vendor.
665 */
666 {
667 .callback = dmi_low_memory_corruption,
668 .ident = "AMI BIOS",
669 .matches = {
670 DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
671 },
672 },
673 {
674 .callback = dmi_low_memory_corruption,
675 .ident = "AMI BIOS",
676 .matches = {
677 DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
678 },
679 },
680 /*
681 * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so
682 * match on the product name.
683 */
684 {
685 .callback = dmi_low_memory_corruption,
686 .ident = "Phoenix BIOS",
687 .matches = {
688 DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"),
689 },
690 },
691#endif
692 {}
693};
694 621
695static void __init trim_bios_range(void) 622static void __init trim_bios_range(void)
696{ 623{
@@ -698,8 +625,14 @@ static void __init trim_bios_range(void)
698 * A special case is the first 4Kb of memory; 625 * A special case is the first 4Kb of memory;
699 * This is a BIOS owned area, not kernel ram, but generally 626 * This is a BIOS owned area, not kernel ram, but generally
700 * not listed as such in the E820 table. 627 * not listed as such in the E820 table.
628 *
629 * This typically reserves additional memory (64KiB by default)
630 * since some BIOSes are known to corrupt low memory. See the
631 * Kconfig help text for X86_RESERVE_LOW.
701 */ 632 */
702 e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED); 633 e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE),
634 E820_RAM, E820_RESERVED);
635
703 /* 636 /*
704 * special case: Some BIOSen report the PC BIOS 637 * special case: Some BIOSen report the PC BIOS
705 * area (640->1Mb) as ram even though it is not. 638 * area (640->1Mb) as ram even though it is not.
@@ -709,6 +642,28 @@ static void __init trim_bios_range(void)
709 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 642 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
710} 643}
711 644
645static int __init parse_reservelow(char *p)
646{
647 unsigned long long size;
648
649 if (!p)
650 return -EINVAL;
651
652 size = memparse(p, &p);
653
654 if (size < 4096)
655 size = 4096;
656
657 if (size > 640*1024)
658 size = 640*1024;
659
660 reserve_low = size;
661
662 return 0;
663}
664
665early_param("reservelow", parse_reservelow);
666
712/* 667/*
713 * Determine if we were loaded by an EFI loader. If so, then we have also been 668 * Determine if we were loaded by an EFI loader. If so, then we have also been
714 * passed the efi memmap, systab, etc., so we should use these data structures 669 * passed the efi memmap, systab, etc., so we should use these data structures
@@ -726,6 +681,7 @@ void __init setup_arch(char **cmdline_p)
726{ 681{
727 int acpi = 0; 682 int acpi = 0;
728 int k8 = 0; 683 int k8 = 0;
684 unsigned long flags;
729 685
730#ifdef CONFIG_X86_32 686#ifdef CONFIG_X86_32
731 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 687 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
@@ -734,10 +690,10 @@ void __init setup_arch(char **cmdline_p)
734 printk(KERN_INFO "Command line: %s\n", boot_command_line); 690 printk(KERN_INFO "Command line: %s\n", boot_command_line);
735#endif 691#endif
736 692
737 /* VMI may relocate the fixmap; do this before touching ioremap area */ 693 /*
738 vmi_init(); 694 * If we have OLPC OFW, we might end up relocating the fixmap due to
739 695 * reserve_top(), so do this before touching the ioremap area.
740 /* OFW also may relocate the fixmap */ 696 */
741 olpc_ofw_detect(); 697 olpc_ofw_detect();
742 698
743 early_trap_init(); 699 early_trap_init();
@@ -838,9 +794,6 @@ void __init setup_arch(char **cmdline_p)
838 794
839 x86_report_nx(); 795 x86_report_nx();
840 796
841 /* Must be before kernel pagetables are setup */
842 vmi_activate();
843
844 /* after early param, so could get panic from serial */ 797 /* after early param, so could get panic from serial */
845 reserve_early_setup_data(); 798 reserve_early_setup_data();
846 799
@@ -863,8 +816,6 @@ void __init setup_arch(char **cmdline_p)
863 816
864 dmi_scan_machine(); 817 dmi_scan_machine();
865 818
866 dmi_check_system(bad_bios_dmi_table);
867
868 /* 819 /*
869 * VMware detection requires dmi to be available, so this 820 * VMware detection requires dmi to be available, so this
870 * needs to be done after dmi_scan_machine, for the BP. 821 * needs to be done after dmi_scan_machine, for the BP.
@@ -1071,6 +1022,10 @@ void __init setup_arch(char **cmdline_p)
1071 x86_init.oem.banner(); 1022 x86_init.oem.banner();
1072 1023
1073 mcheck_init(); 1024 mcheck_init();
1025
1026 local_irq_save(flags);
1027 arch_init_ideal_nop5();
1028 local_irq_restore(flags);
1074} 1029}
1075 1030
1076#ifdef CONFIG_X86_32 1031#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index a60df9ae6454..2335c15c93a4 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -253,7 +253,7 @@ void __init setup_per_cpu_areas(void)
253 * Up to this point, the boot CPU has been using .init.data 253 * Up to this point, the boot CPU has been using .init.data
254 * area. Reload any changed state for the boot CPU. 254 * area. Reload any changed state for the boot CPU.
255 */ 255 */
256 if (cpu == boot_cpu_id) 256 if (!cpu)
257 switch_to_new_gdt(cpu); 257 switch_to_new_gdt(cpu);
258 } 258 }
259 259
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c
index cb22acf3ed09..dd4c281ffe57 100644
--- a/arch/x86/kernel/sfi.c
+++ b/arch/x86/kernel/sfi.c
@@ -34,7 +34,7 @@
34#ifdef CONFIG_X86_LOCAL_APIC 34#ifdef CONFIG_X86_LOCAL_APIC
35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; 35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
36 36
37void __init mp_sfi_register_lapic_address(unsigned long address) 37static void __init mp_sfi_register_lapic_address(unsigned long address)
38{ 38{
39 mp_lapic_addr = address; 39 mp_lapic_addr = address;
40 40
@@ -46,7 +46,7 @@ void __init mp_sfi_register_lapic_address(unsigned long address)
46} 46}
47 47
48/* All CPUs enumerated by SFI must be present and enabled */ 48/* All CPUs enumerated by SFI must be present and enabled */
49void __cpuinit mp_sfi_register_lapic(u8 id) 49static void __cpuinit mp_sfi_register_lapic(u8 id)
50{ 50{
51 if (MAX_APICS - id <= 0) { 51 if (MAX_APICS - id <= 0) {
52 pr_warning("Processor #%d invalid (max %d)\n", 52 pr_warning("Processor #%d invalid (max %d)\n",
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8b3bfc4dd708..dfb50890b5b7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,7 +62,7 @@
62#include <asm/pgtable.h> 62#include <asm/pgtable.h>
63#include <asm/tlbflush.h> 63#include <asm/tlbflush.h>
64#include <asm/mtrr.h> 64#include <asm/mtrr.h>
65#include <asm/vmi.h> 65#include <asm/mwait.h>
66#include <asm/apic.h> 66#include <asm/apic.h>
67#include <asm/setup.h> 67#include <asm/setup.h>
68#include <asm/uv/uv.h> 68#include <asm/uv/uv.h>
@@ -311,7 +311,6 @@ notrace static void __cpuinit start_secondary(void *unused)
311 __flush_tlb_all(); 311 __flush_tlb_all();
312#endif 312#endif
313 313
314 vmi_bringup();
315 cpu_init(); 314 cpu_init();
316 preempt_disable(); 315 preempt_disable();
317 smp_callin(); 316 smp_callin();
@@ -324,9 +323,9 @@ notrace static void __cpuinit start_secondary(void *unused)
324 check_tsc_sync_target(); 323 check_tsc_sync_target();
325 324
326 if (nmi_watchdog == NMI_IO_APIC) { 325 if (nmi_watchdog == NMI_IO_APIC) {
327 legacy_pic->chip->mask(0); 326 legacy_pic->mask(0);
328 enable_NMI_through_LVT0(); 327 enable_NMI_through_LVT0();
329 legacy_pic->chip->unmask(0); 328 legacy_pic->unmask(0);
330 } 329 }
331 330
332 /* This must be done before setting cpu_online_mask */ 331 /* This must be done before setting cpu_online_mask */
@@ -397,6 +396,19 @@ void __cpuinit smp_store_cpu_info(int id)
397 identify_secondary_cpu(c); 396 identify_secondary_cpu(c);
398} 397}
399 398
399static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
400{
401 struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
402 struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
403
404 cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
405 cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
406 cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
407 cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
408 cpumask_set_cpu(cpu1, c2->llc_shared_map);
409 cpumask_set_cpu(cpu2, c1->llc_shared_map);
410}
411
400 412
401void __cpuinit set_cpu_sibling_map(int cpu) 413void __cpuinit set_cpu_sibling_map(int cpu)
402{ 414{
@@ -409,14 +421,13 @@ void __cpuinit set_cpu_sibling_map(int cpu)
409 for_each_cpu(i, cpu_sibling_setup_mask) { 421 for_each_cpu(i, cpu_sibling_setup_mask) {
410 struct cpuinfo_x86 *o = &cpu_data(i); 422 struct cpuinfo_x86 *o = &cpu_data(i);
411 423
412 if (c->phys_proc_id == o->phys_proc_id && 424 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
413 c->cpu_core_id == o->cpu_core_id) { 425 if (c->phys_proc_id == o->phys_proc_id &&
414 cpumask_set_cpu(i, cpu_sibling_mask(cpu)); 426 c->compute_unit_id == o->compute_unit_id)
415 cpumask_set_cpu(cpu, cpu_sibling_mask(i)); 427 link_thread_siblings(cpu, i);
416 cpumask_set_cpu(i, cpu_core_mask(cpu)); 428 } else if (c->phys_proc_id == o->phys_proc_id &&
417 cpumask_set_cpu(cpu, cpu_core_mask(i)); 429 c->cpu_core_id == o->cpu_core_id) {
418 cpumask_set_cpu(i, c->llc_shared_map); 430 link_thread_siblings(cpu, i);
419 cpumask_set_cpu(cpu, o->llc_shared_map);
420 } 431 }
421 } 432 }
422 } else { 433 } else {
@@ -1109,8 +1120,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1109 } 1120 }
1110 set_cpu_sibling_map(0); 1121 set_cpu_sibling_map(0);
1111 1122
1112 enable_IR_x2apic();
1113 default_setup_apic_routing();
1114 1123
1115 if (smp_sanity_check(max_cpus) < 0) { 1124 if (smp_sanity_check(max_cpus) < 0) {
1116 printk(KERN_INFO "SMP disabled\n"); 1125 printk(KERN_INFO "SMP disabled\n");
@@ -1118,6 +1127,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1118 goto out; 1127 goto out;
1119 } 1128 }
1120 1129
1130 default_setup_apic_routing();
1131
1121 preempt_disable(); 1132 preempt_disable();
1122 if (read_apic_id() != boot_cpu_physical_apicid) { 1133 if (read_apic_id() != boot_cpu_physical_apicid) {
1123 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1134 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
@@ -1383,11 +1394,88 @@ void play_dead_common(void)
1383 local_irq_disable(); 1394 local_irq_disable();
1384} 1395}
1385 1396
1397/*
1398 * We need to flush the caches before going to sleep, lest we have
1399 * dirty data in our caches when we come back up.
1400 */
1401static inline void mwait_play_dead(void)
1402{
1403 unsigned int eax, ebx, ecx, edx;
1404 unsigned int highest_cstate = 0;
1405 unsigned int highest_subcstate = 0;
1406 int i;
1407 void *mwait_ptr;
1408
1409 if (!cpu_has(&current_cpu_data, X86_FEATURE_MWAIT))
1410 return;
1411 if (!cpu_has(&current_cpu_data, X86_FEATURE_CLFLSH))
1412 return;
1413 if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1414 return;
1415
1416 eax = CPUID_MWAIT_LEAF;
1417 ecx = 0;
1418 native_cpuid(&eax, &ebx, &ecx, &edx);
1419
1420 /*
1421 * eax will be 0 if EDX enumeration is not valid.
1422 * Initialized below to cstate, sub_cstate value when EDX is valid.
1423 */
1424 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
1425 eax = 0;
1426 } else {
1427 edx >>= MWAIT_SUBSTATE_SIZE;
1428 for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
1429 if (edx & MWAIT_SUBSTATE_MASK) {
1430 highest_cstate = i;
1431 highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
1432 }
1433 }
1434 eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
1435 (highest_subcstate - 1);
1436 }
1437
1438 /*
1439 * This should be a memory location in a cache line which is
1440 * unlikely to be touched by other processors. The actual
1441 * content is immaterial as it is not actually modified in any way.
1442 */
1443 mwait_ptr = &current_thread_info()->flags;
1444
1445 wbinvd();
1446
1447 while (1) {
1448 /*
1449 * The CLFLUSH is a workaround for erratum AAI65 for
1450 * the Xeon 7400 series. It's not clear it is actually
1451 * needed, but it should be harmless in either case.
1452 * The WBINVD is insufficient due to the spurious-wakeup
1453 * case where we return around the loop.
1454 */
1455 clflush(mwait_ptr);
1456 __monitor(mwait_ptr, 0, 0);
1457 mb();
1458 __mwait(eax, 0);
1459 }
1460}
1461
1462static inline void hlt_play_dead(void)
1463{
1464 if (current_cpu_data.x86 >= 4)
1465 wbinvd();
1466
1467 while (1) {
1468 native_halt();
1469 }
1470}
1471
1386void native_play_dead(void) 1472void native_play_dead(void)
1387{ 1473{
1388 play_dead_common(); 1474 play_dead_common();
1389 tboot_shutdown(TB_SHUTDOWN_WFS); 1475 tboot_shutdown(TB_SHUTDOWN_WFS);
1390 wbinvd_halt(); 1476
1477 mwait_play_dead(); /* Only returns on failure */
1478 hlt_play_dead();
1391} 1479}
1392 1480
1393#else /* ... !CONFIG_HOTPLUG_CPU */ 1481#else /* ... !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index d5e06624e34a..0b0cb5fede19 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -33,8 +33,8 @@ int kernel_execve(const char *filename,
33 const char *const envp[]) 33 const char *const envp[])
34{ 34{
35 long __res; 35 long __res;
36 asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" 36 asm volatile ("int $0x80"
37 : "=a" (__res) 37 : "=a" (__res)
38 : "0" (__NR_execve), "ri" (filename), "c" (argv), "d" (envp) : "memory"); 38 : "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory");
39 return __res; 39 return __res;
40} 40}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 60788dee0f8a..d43968503dd2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -776,21 +776,10 @@ asmlinkage void math_state_restore(void)
776} 776}
777EXPORT_SYMBOL_GPL(math_state_restore); 777EXPORT_SYMBOL_GPL(math_state_restore);
778 778
779#ifndef CONFIG_MATH_EMULATION
780void math_emulate(struct math_emu_info *info)
781{
782 printk(KERN_EMERG
783 "math-emulation not enabled and no coprocessor found.\n");
784 printk(KERN_EMERG "killing %s.\n", current->comm);
785 force_sig(SIGFPE, current);
786 schedule();
787}
788#endif /* CONFIG_MATH_EMULATION */
789
790dotraplinkage void __kprobes 779dotraplinkage void __kprobes
791do_device_not_available(struct pt_regs *regs, long error_code) 780do_device_not_available(struct pt_regs *regs, long error_code)
792{ 781{
793#ifdef CONFIG_X86_32 782#ifdef CONFIG_MATH_EMULATION
794 if (read_cr0() & X86_CR0_EM) { 783 if (read_cr0() & X86_CR0_EM) {
795 struct math_emu_info info = { }; 784 struct math_emu_info info = { };
796 785
@@ -798,12 +787,12 @@ do_device_not_available(struct pt_regs *regs, long error_code)
798 787
799 info.regs = regs; 788 info.regs = regs;
800 math_emulate(&info); 789 math_emulate(&info);
801 } else { 790 return;
802 math_state_restore(); /* interrupts still off */
803 conditional_sti(regs);
804 } 791 }
805#else 792#endif
806 math_state_restore(); 793 math_state_restore(); /* interrupts still off */
794#ifdef CONFIG_X86_32
795 conditional_sti(regs);
807#endif 796#endif
808} 797}
809 798
@@ -881,18 +870,6 @@ void __init trap_init(void)
881#endif 870#endif
882 871
883#ifdef CONFIG_X86_32 872#ifdef CONFIG_X86_32
884 if (cpu_has_fxsr) {
885 printk(KERN_INFO "Enabling fast FPU save and restore... ");
886 set_in_cr4(X86_CR4_OSFXSR);
887 printk("done.\n");
888 }
889 if (cpu_has_xmm) {
890 printk(KERN_INFO
891 "Enabling unmasked SIMD FPU exception support... ");
892 set_in_cr4(X86_CR4_OSXMMEXCPT);
893 printk("done.\n");
894 }
895
896 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 873 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
897 set_bit(SYSCALL_VECTOR, used_vectors); 874 set_bit(SYSCALL_VECTOR, used_vectors);
898#endif 875#endif
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 26a863a9c2a8..0c40d8b72416 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -104,10 +104,14 @@ int __init notsc_setup(char *str)
104 104
105__setup("notsc", notsc_setup); 105__setup("notsc", notsc_setup);
106 106
107static int no_sched_irq_time;
108
107static int __init tsc_setup(char *str) 109static int __init tsc_setup(char *str)
108{ 110{
109 if (!strcmp(str, "reliable")) 111 if (!strcmp(str, "reliable"))
110 tsc_clocksource_reliable = 1; 112 tsc_clocksource_reliable = 1;
113 if (!strncmp(str, "noirqtime", 9))
114 no_sched_irq_time = 1;
111 return 1; 115 return 1;
112} 116}
113 117
@@ -801,6 +805,7 @@ void mark_tsc_unstable(char *reason)
801 if (!tsc_unstable) { 805 if (!tsc_unstable) {
802 tsc_unstable = 1; 806 tsc_unstable = 1;
803 sched_clock_stable = 0; 807 sched_clock_stable = 0;
808 disable_sched_clock_irqtime();
804 printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); 809 printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
805 /* Change only the rating, when not registered */ 810 /* Change only the rating, when not registered */
806 if (clocksource_tsc.mult) 811 if (clocksource_tsc.mult)
@@ -892,60 +897,6 @@ static void __init init_tsc_clocksource(void)
892 clocksource_register_khz(&clocksource_tsc, tsc_khz); 897 clocksource_register_khz(&clocksource_tsc, tsc_khz);
893} 898}
894 899
895#ifdef CONFIG_X86_64
896/*
897 * calibrate_cpu is used on systems with fixed rate TSCs to determine
898 * processor frequency
899 */
900#define TICK_COUNT 100000000
901static unsigned long __init calibrate_cpu(void)
902{
903 int tsc_start, tsc_now;
904 int i, no_ctr_free;
905 unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
906 unsigned long flags;
907
908 for (i = 0; i < 4; i++)
909 if (avail_to_resrv_perfctr_nmi_bit(i))
910 break;
911 no_ctr_free = (i == 4);
912 if (no_ctr_free) {
913 WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
914 "cpu_khz value may be incorrect.\n");
915 i = 3;
916 rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
917 wrmsrl(MSR_K7_EVNTSEL3, 0);
918 rdmsrl(MSR_K7_PERFCTR3, pmc3);
919 } else {
920 reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
921 reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
922 }
923 local_irq_save(flags);
924 /* start measuring cycles, incrementing from 0 */
925 wrmsrl(MSR_K7_PERFCTR0 + i, 0);
926 wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
927 rdtscl(tsc_start);
928 do {
929 rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
930 tsc_now = get_cycles();
931 } while ((tsc_now - tsc_start) < TICK_COUNT);
932
933 local_irq_restore(flags);
934 if (no_ctr_free) {
935 wrmsrl(MSR_K7_EVNTSEL3, 0);
936 wrmsrl(MSR_K7_PERFCTR3, pmc3);
937 wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
938 } else {
939 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
940 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
941 }
942
943 return pmc_now * tsc_khz / (tsc_now - tsc_start);
944}
945#else
946static inline unsigned long calibrate_cpu(void) { return cpu_khz; }
947#endif
948
949void __init tsc_init(void) 900void __init tsc_init(void)
950{ 901{
951 u64 lpj; 902 u64 lpj;
@@ -964,10 +915,6 @@ void __init tsc_init(void)
964 return; 915 return;
965 } 916 }
966 917
967 if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
968 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
969 cpu_khz = calibrate_cpu();
970
971 printk("Detected %lu.%03lu MHz processor.\n", 918 printk("Detected %lu.%03lu MHz processor.\n",
972 (unsigned long)cpu_khz / 1000, 919 (unsigned long)cpu_khz / 1000,
973 (unsigned long)cpu_khz % 1000); 920 (unsigned long)cpu_khz % 1000);
@@ -987,6 +934,9 @@ void __init tsc_init(void)
987 /* now allow native_sched_clock() to use rdtsc */ 934 /* now allow native_sched_clock() to use rdtsc */
988 tsc_disabled = 0; 935 tsc_disabled = 0;
989 936
937 if (!no_sched_irq_time)
938 enable_sched_clock_irqtime();
939
990 lpj = ((u64)tsc_khz * 1000); 940 lpj = ((u64)tsc_khz * 1000);
991 do_div(lpj, HZ); 941 do_div(lpj, HZ);
992 lpj_fine = lpj; 942 lpj_fine = lpj;
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index 1132129db792..7b24460917d5 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -28,34 +28,21 @@ struct uv_irq_2_mmr_pnode{
28static spinlock_t uv_irq_lock; 28static spinlock_t uv_irq_lock;
29static struct rb_root uv_irq_root; 29static struct rb_root uv_irq_root;
30 30
31static int uv_set_irq_affinity(unsigned int, const struct cpumask *); 31static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool);
32 32
33static void uv_noop(unsigned int irq) 33static void uv_noop(struct irq_data *data) { }
34{
35}
36
37static unsigned int uv_noop_ret(unsigned int irq)
38{
39 return 0;
40}
41 34
42static void uv_ack_apic(unsigned int irq) 35static void uv_ack_apic(struct irq_data *data)
43{ 36{
44 ack_APIC_irq(); 37 ack_APIC_irq();
45} 38}
46 39
47static struct irq_chip uv_irq_chip = { 40static struct irq_chip uv_irq_chip = {
48 .name = "UV-CORE", 41 .name = "UV-CORE",
49 .startup = uv_noop_ret, 42 .irq_mask = uv_noop,
50 .shutdown = uv_noop, 43 .irq_unmask = uv_noop,
51 .enable = uv_noop, 44 .irq_eoi = uv_ack_apic,
52 .disable = uv_noop, 45 .irq_set_affinity = uv_set_irq_affinity,
53 .ack = uv_noop,
54 .mask = uv_noop,
55 .unmask = uv_noop,
56 .eoi = uv_ack_apic,
57 .end = uv_noop,
58 .set_affinity = uv_set_irq_affinity,
59}; 46};
60 47
61/* 48/*
@@ -144,26 +131,22 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
144 unsigned long mmr_offset, int limit) 131 unsigned long mmr_offset, int limit)
145{ 132{
146 const struct cpumask *eligible_cpu = cpumask_of(cpu); 133 const struct cpumask *eligible_cpu = cpumask_of(cpu);
147 struct irq_desc *desc = irq_to_desc(irq); 134 struct irq_cfg *cfg = get_irq_chip_data(irq);
148 struct irq_cfg *cfg;
149 int mmr_pnode;
150 unsigned long mmr_value; 135 unsigned long mmr_value;
151 struct uv_IO_APIC_route_entry *entry; 136 struct uv_IO_APIC_route_entry *entry;
152 int err; 137 int mmr_pnode, err;
153 138
154 BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != 139 BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
155 sizeof(unsigned long)); 140 sizeof(unsigned long));
156 141
157 cfg = irq_cfg(irq);
158
159 err = assign_irq_vector(irq, cfg, eligible_cpu); 142 err = assign_irq_vector(irq, cfg, eligible_cpu);
160 if (err != 0) 143 if (err != 0)
161 return err; 144 return err;
162 145
163 if (limit == UV_AFFINITY_CPU) 146 if (limit == UV_AFFINITY_CPU)
164 desc->status |= IRQ_NO_BALANCING; 147 irq_set_status_flags(irq, IRQ_NO_BALANCING);
165 else 148 else
166 desc->status |= IRQ_MOVE_PCNTXT; 149 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
167 150
168 set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, 151 set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
169 irq_name); 152 irq_name);
@@ -206,17 +189,17 @@ static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
206 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 189 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
207} 190}
208 191
209static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) 192static int
193uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
194 bool force)
210{ 195{
211 struct irq_desc *desc = irq_to_desc(irq); 196 struct irq_cfg *cfg = data->chip_data;
212 struct irq_cfg *cfg = desc->chip_data;
213 unsigned int dest; 197 unsigned int dest;
214 unsigned long mmr_value; 198 unsigned long mmr_value, mmr_offset;
215 struct uv_IO_APIC_route_entry *entry; 199 struct uv_IO_APIC_route_entry *entry;
216 unsigned long mmr_offset;
217 int mmr_pnode; 200 int mmr_pnode;
218 201
219 if (set_desc_affinity(desc, mask, &dest)) 202 if (__ioapic_set_affinity(data, mask, &dest))
220 return -1; 203 return -1;
221 204
222 mmr_value = 0; 205 mmr_value = 0;
@@ -231,7 +214,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
231 entry->dest = dest; 214 entry->dest = dest;
232 215
233 /* Get previously stored MMR and pnode of hub sourcing interrupts */ 216 /* Get previously stored MMR and pnode of hub sourcing interrupts */
234 if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) 217 if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode))
235 return -1; 218 return -1;
236 219
237 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 220 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index e680ea52db9b..3371bd053b89 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -66,10 +66,7 @@ static void __init visws_time_init(void)
66} 66}
67 67
68/* Replaces the default init_ISA_irqs in the generic setup */ 68/* Replaces the default init_ISA_irqs in the generic setup */
69static void __init visws_pre_intr_init(void) 69static void __init visws_pre_intr_init(void);
70{
71 init_VISWS_APIC_irqs();
72}
73 70
74/* Quirk for machine specific memory setup. */ 71/* Quirk for machine specific memory setup. */
75 72
@@ -429,67 +426,34 @@ static int is_co_apic(unsigned int irq)
429/* 426/*
430 * This is the SGI Cobalt (IO-)APIC: 427 * This is the SGI Cobalt (IO-)APIC:
431 */ 428 */
432 429static void enable_cobalt_irq(struct irq_data *data)
433static void enable_cobalt_irq(unsigned int irq)
434{ 430{
435 co_apic_set(is_co_apic(irq), irq); 431 co_apic_set(is_co_apic(data->irq), data->irq);
436} 432}
437 433
438static void disable_cobalt_irq(unsigned int irq) 434static void disable_cobalt_irq(struct irq_data *data)
439{ 435{
440 int entry = is_co_apic(irq); 436 int entry = is_co_apic(data->irq);
441 437
442 co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); 438 co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
443 co_apic_read(CO_APIC_LO(entry)); 439 co_apic_read(CO_APIC_LO(entry));
444} 440}
445 441
446/* 442static void ack_cobalt_irq(struct irq_data *data)
447 * "irq" really just serves to identify the device. Here is where we
448 * map this to the Cobalt APIC entry where it's physically wired.
449 * This is called via request_irq -> setup_irq -> irq_desc->startup()
450 */
451static unsigned int startup_cobalt_irq(unsigned int irq)
452{ 443{
453 unsigned long flags; 444 unsigned long flags;
454 struct irq_desc *desc = irq_to_desc(irq);
455 445
456 spin_lock_irqsave(&cobalt_lock, flags); 446 spin_lock_irqsave(&cobalt_lock, flags);
457 if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) 447 disable_cobalt_irq(data);
458 desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
459 enable_cobalt_irq(irq);
460 spin_unlock_irqrestore(&cobalt_lock, flags);
461 return 0;
462}
463
464static void ack_cobalt_irq(unsigned int irq)
465{
466 unsigned long flags;
467
468 spin_lock_irqsave(&cobalt_lock, flags);
469 disable_cobalt_irq(irq);
470 apic_write(APIC_EOI, APIC_EIO_ACK); 448 apic_write(APIC_EOI, APIC_EIO_ACK);
471 spin_unlock_irqrestore(&cobalt_lock, flags); 449 spin_unlock_irqrestore(&cobalt_lock, flags);
472} 450}
473 451
474static void end_cobalt_irq(unsigned int irq)
475{
476 unsigned long flags;
477 struct irq_desc *desc = irq_to_desc(irq);
478
479 spin_lock_irqsave(&cobalt_lock, flags);
480 if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS)))
481 enable_cobalt_irq(irq);
482 spin_unlock_irqrestore(&cobalt_lock, flags);
483}
484
485static struct irq_chip cobalt_irq_type = { 452static struct irq_chip cobalt_irq_type = {
486 .name = "Cobalt-APIC", 453 .name = "Cobalt-APIC",
487 .startup = startup_cobalt_irq, 454 .irq_enable = enable_cobalt_irq,
488 .shutdown = disable_cobalt_irq, 455 .irq_disable = disable_cobalt_irq,
489 .enable = enable_cobalt_irq, 456 .irq_ack = ack_cobalt_irq,
490 .disable = disable_cobalt_irq,
491 .ack = ack_cobalt_irq,
492 .end = end_cobalt_irq,
493}; 457};
494 458
495 459
@@ -503,35 +467,34 @@ static struct irq_chip cobalt_irq_type = {
503 * interrupt controller type, and through a special virtual interrupt- 467 * interrupt controller type, and through a special virtual interrupt-
504 * controller. Device drivers only see the virtual interrupt sources. 468 * controller. Device drivers only see the virtual interrupt sources.
505 */ 469 */
506static unsigned int startup_piix4_master_irq(unsigned int irq) 470static unsigned int startup_piix4_master_irq(struct irq_data *data)
507{ 471{
508 legacy_pic->init(0); 472 legacy_pic->init(0);
509 473 enable_cobalt_irq(data);
510 return startup_cobalt_irq(irq);
511} 474}
512 475
513static void end_piix4_master_irq(unsigned int irq) 476static void end_piix4_master_irq(struct irq_data *data)
514{ 477{
515 unsigned long flags; 478 unsigned long flags;
516 479
517 spin_lock_irqsave(&cobalt_lock, flags); 480 spin_lock_irqsave(&cobalt_lock, flags);
518 enable_cobalt_irq(irq); 481 enable_cobalt_irq(data);
519 spin_unlock_irqrestore(&cobalt_lock, flags); 482 spin_unlock_irqrestore(&cobalt_lock, flags);
520} 483}
521 484
522static struct irq_chip piix4_master_irq_type = { 485static struct irq_chip piix4_master_irq_type = {
523 .name = "PIIX4-master", 486 .name = "PIIX4-master",
524 .startup = startup_piix4_master_irq, 487 .irq_startup = startup_piix4_master_irq,
525 .ack = ack_cobalt_irq, 488 .irq_ack = ack_cobalt_irq,
526 .end = end_piix4_master_irq,
527}; 489};
528 490
491static void pii4_mask(struct irq_data *data) { }
529 492
530static struct irq_chip piix4_virtual_irq_type = { 493static struct irq_chip piix4_virtual_irq_type = {
531 .name = "PIIX4-virtual", 494 .name = "PIIX4-virtual",
495 .mask = pii4_mask,
532}; 496};
533 497
534
535/* 498/*
536 * PIIX4-8259 master/virtual functions to handle interrupt requests 499 * PIIX4-8259 master/virtual functions to handle interrupt requests
537 * from legacy devices: floppy, parallel, serial, rtc. 500 * from legacy devices: floppy, parallel, serial, rtc.
@@ -549,9 +512,8 @@ static struct irq_chip piix4_virtual_irq_type = {
549 */ 512 */
550static irqreturn_t piix4_master_intr(int irq, void *dev_id) 513static irqreturn_t piix4_master_intr(int irq, void *dev_id)
551{ 514{
552 int realirq;
553 struct irq_desc *desc;
554 unsigned long flags; 515 unsigned long flags;
516 int realirq;
555 517
556 raw_spin_lock_irqsave(&i8259A_lock, flags); 518 raw_spin_lock_irqsave(&i8259A_lock, flags);
557 519
@@ -592,18 +554,10 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
592 554
593 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 555 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
594 556
595 desc = irq_to_desc(realirq);
596
597 /* 557 /*
598 * handle this 'virtual interrupt' as a Cobalt one now. 558 * handle this 'virtual interrupt' as a Cobalt one now.
599 */ 559 */
600 kstat_incr_irqs_this_cpu(realirq, desc); 560 generic_handle_irq(realirq);
601
602 if (likely(desc->action != NULL))
603 handle_IRQ_event(realirq, desc->action);
604
605 if (!(desc->status & IRQ_DISABLED))
606 legacy_pic->chip->unmask(realirq);
607 561
608 return IRQ_HANDLED; 562 return IRQ_HANDLED;
609 563
@@ -624,41 +578,35 @@ static struct irqaction cascade_action = {
624 578
625static inline void set_piix4_virtual_irq_type(void) 579static inline void set_piix4_virtual_irq_type(void)
626{ 580{
627 piix4_virtual_irq_type.shutdown = i8259A_chip.mask;
628 piix4_virtual_irq_type.enable = i8259A_chip.unmask; 581 piix4_virtual_irq_type.enable = i8259A_chip.unmask;
629 piix4_virtual_irq_type.disable = i8259A_chip.mask; 582 piix4_virtual_irq_type.disable = i8259A_chip.mask;
583 piix4_virtual_irq_type.unmask = i8259A_chip.unmask;
630} 584}
631 585
632void init_VISWS_APIC_irqs(void) 586static void __init visws_pre_intr_init(void)
633{ 587{
634 int i; 588 int i;
635 589
636 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { 590 set_piix4_virtual_irq_type();
637 struct irq_desc *desc = irq_to_desc(i);
638
639 desc->status = IRQ_DISABLED;
640 desc->action = 0;
641 desc->depth = 1;
642 591
643 if (i == 0) { 592 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
644 desc->chip = &cobalt_irq_type; 593 struct irq_chip *chip = NULL;
645 } 594
646 else if (i == CO_IRQ_IDE0) { 595 if (i == 0)
647 desc->chip = &cobalt_irq_type; 596 chip = &cobalt_irq_type;
648 } 597 else if (i == CO_IRQ_IDE0)
649 else if (i == CO_IRQ_IDE1) { 598 chip = &cobalt_irq_type;
650 desc->chip = &cobalt_irq_type; 599 else if (i == CO_IRQ_IDE1)
651 } 600 >chip = &cobalt_irq_type;
652 else if (i == CO_IRQ_8259) { 601 else if (i == CO_IRQ_8259)
653 desc->chip = &piix4_master_irq_type; 602 chip = &piix4_master_irq_type;
654 } 603 else if (i < CO_IRQ_APIC0)
655 else if (i < CO_IRQ_APIC0) { 604 chip = &piix4_virtual_irq_type;
656 set_piix4_virtual_irq_type(); 605 else if (IS_CO_APIC(i))
657 desc->chip = &piix4_virtual_irq_type; 606 chip = &cobalt_irq_type;
658 } 607
659 else if (IS_CO_APIC(i)) { 608 if (chip)
660 desc->chip = &cobalt_irq_type; 609 set_irq_chip(i, chip);
661 }
662 } 610 }
663 611
664 setup_irq(CO_IRQ_8259, &master_action); 612 setup_irq(CO_IRQ_8259, &master_action);
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
deleted file mode 100644
index ce9fbacb7526..000000000000
--- a/arch/x86/kernel/vmi_32.c
+++ /dev/null
@@ -1,893 +0,0 @@
1/*
2 * VMI specific paravirt-ops implementation
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to zach@vmware.com
22 *
23 */
24
25#include <linux/module.h>
26#include <linux/cpu.h>
27#include <linux/bootmem.h>
28#include <linux/mm.h>
29#include <linux/highmem.h>
30#include <linux/sched.h>
31#include <linux/gfp.h>
32#include <asm/vmi.h>
33#include <asm/io.h>
34#include <asm/fixmap.h>
35#include <asm/apicdef.h>
36#include <asm/apic.h>
37#include <asm/pgalloc.h>
38#include <asm/processor.h>
39#include <asm/timer.h>
40#include <asm/vmi_time.h>
41#include <asm/kmap_types.h>
42#include <asm/setup.h>
43
44/* Convenient for calling VMI functions indirectly in the ROM */
45typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
46typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int);
47
48#define call_vrom_func(rom,func) \
49 (((VROMFUNC *)(rom->func))())
50
51#define call_vrom_long_func(rom,func,arg) \
52 (((VROMLONGFUNC *)(rom->func)) (arg))
53
54static struct vrom_header *vmi_rom;
55static int disable_pge;
56static int disable_pse;
57static int disable_sep;
58static int disable_tsc;
59static int disable_mtrr;
60static int disable_noidle;
61static int disable_vmi_timer;
62
63/* Cached VMI operations */
64static struct {
65 void (*cpuid)(void /* non-c */);
66 void (*_set_ldt)(u32 selector);
67 void (*set_tr)(u32 selector);
68 void (*write_idt_entry)(struct desc_struct *, int, u32, u32);
69 void (*write_gdt_entry)(struct desc_struct *, int, u32, u32);
70 void (*write_ldt_entry)(struct desc_struct *, int, u32, u32);
71 void (*set_kernel_stack)(u32 selector, u32 sp0);
72 void (*allocate_page)(u32, u32, u32, u32, u32);
73 void (*release_page)(u32, u32);
74 void (*set_pte)(pte_t, pte_t *, unsigned);
75 void (*update_pte)(pte_t *, unsigned);
76 void (*set_linear_mapping)(int, void *, u32, u32);
77 void (*_flush_tlb)(int);
78 void (*set_initial_ap_state)(int, int);
79 void (*halt)(void);
80 void (*set_lazy_mode)(int mode);
81} vmi_ops;
82
83/* Cached VMI operations */
84struct vmi_timer_ops vmi_timer_ops;
85
86/*
87 * VMI patching routines.
88 */
89#define MNEM_CALL 0xe8
90#define MNEM_JMP 0xe9
91#define MNEM_RET 0xc3
92
93#define IRQ_PATCH_INT_MASK 0
94#define IRQ_PATCH_DISABLE 5
95
96static inline void patch_offset(void *insnbuf,
97 unsigned long ip, unsigned long dest)
98{
99 *(unsigned long *)(insnbuf+1) = dest-ip-5;
100}
101
102static unsigned patch_internal(int call, unsigned len, void *insnbuf,
103 unsigned long ip)
104{
105 u64 reloc;
106 struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc;
107 reloc = call_vrom_long_func(vmi_rom, get_reloc, call);
108 switch(rel->type) {
109 case VMI_RELOCATION_CALL_REL:
110 BUG_ON(len < 5);
111 *(char *)insnbuf = MNEM_CALL;
112 patch_offset(insnbuf, ip, (unsigned long)rel->eip);
113 return 5;
114
115 case VMI_RELOCATION_JUMP_REL:
116 BUG_ON(len < 5);
117 *(char *)insnbuf = MNEM_JMP;
118 patch_offset(insnbuf, ip, (unsigned long)rel->eip);
119 return 5;
120
121 case VMI_RELOCATION_NOP:
122 /* obliterate the whole thing */
123 return 0;
124
125 case VMI_RELOCATION_NONE:
126 /* leave native code in place */
127 break;
128
129 default:
130 BUG();
131 }
132 return len;
133}
134
135/*
136 * Apply patch if appropriate, return length of new instruction
137 * sequence. The callee does nop padding for us.
138 */
139static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
140 unsigned long ip, unsigned len)
141{
142 switch (type) {
143 case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
144 return patch_internal(VMI_CALL_DisableInterrupts, len,
145 insns, ip);
146 case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
147 return patch_internal(VMI_CALL_EnableInterrupts, len,
148 insns, ip);
149 case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
150 return patch_internal(VMI_CALL_SetInterruptMask, len,
151 insns, ip);
152 case PARAVIRT_PATCH(pv_irq_ops.save_fl):
153 return patch_internal(VMI_CALL_GetInterruptMask, len,
154 insns, ip);
155 case PARAVIRT_PATCH(pv_cpu_ops.iret):
156 return patch_internal(VMI_CALL_IRET, len, insns, ip);
157 case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
158 return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip);
159 default:
160 break;
161 }
162 return len;
163}
164
165/* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */
166static void vmi_cpuid(unsigned int *ax, unsigned int *bx,
167 unsigned int *cx, unsigned int *dx)
168{
169 int override = 0;
170 if (*ax == 1)
171 override = 1;
172 asm volatile ("call *%6"
173 : "=a" (*ax),
174 "=b" (*bx),
175 "=c" (*cx),
176 "=d" (*dx)
177 : "0" (*ax), "2" (*cx), "r" (vmi_ops.cpuid));
178 if (override) {
179 if (disable_pse)
180 *dx &= ~X86_FEATURE_PSE;
181 if (disable_pge)
182 *dx &= ~X86_FEATURE_PGE;
183 if (disable_sep)
184 *dx &= ~X86_FEATURE_SEP;
185 if (disable_tsc)
186 *dx &= ~X86_FEATURE_TSC;
187 if (disable_mtrr)
188 *dx &= ~X86_FEATURE_MTRR;
189 }
190}
191
192static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new)
193{
194 if (gdt[nr].a != new->a || gdt[nr].b != new->b)
195 write_gdt_entry(gdt, nr, new, 0);
196}
197
198static void vmi_load_tls(struct thread_struct *t, unsigned int cpu)
199{
200 struct desc_struct *gdt = get_cpu_gdt_table(cpu);
201 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0]);
202 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1]);
203 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2]);
204}
205
206static void vmi_set_ldt(const void *addr, unsigned entries)
207{
208 unsigned cpu = smp_processor_id();
209 struct desc_struct desc;
210
211 pack_descriptor(&desc, (unsigned long)addr,
212 entries * sizeof(struct desc_struct) - 1,
213 DESC_LDT, 0);
214 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, &desc, DESC_LDT);
215 vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0);
216}
217
218static void vmi_set_tr(void)
219{
220 vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct));
221}
222
223static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
224{
225 u32 *idt_entry = (u32 *)g;
226 vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]);
227}
228
229static void vmi_write_gdt_entry(struct desc_struct *dt, int entry,
230 const void *desc, int type)
231{
232 u32 *gdt_entry = (u32 *)desc;
233 vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]);
234}
235
236static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
237 const void *desc)
238{
239 u32 *ldt_entry = (u32 *)desc;
240 vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
241}
242
243static void vmi_load_sp0(struct tss_struct *tss,
244 struct thread_struct *thread)
245{
246 tss->x86_tss.sp0 = thread->sp0;
247
248 /* This can only happen when SEP is enabled, no need to test "SEP"arately */
249 if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
250 tss->x86_tss.ss1 = thread->sysenter_cs;
251 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
252 }
253 vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.sp0);
254}
255
256static void vmi_flush_tlb_user(void)
257{
258 vmi_ops._flush_tlb(VMI_FLUSH_TLB);
259}
260
261static void vmi_flush_tlb_kernel(void)
262{
263 vmi_ops._flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL);
264}
265
266/* Stub to do nothing at all; used for delays and unimplemented calls */
267static void vmi_nop(void)
268{
269}
270
271static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
272{
273 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
274}
275
276static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn)
277{
278 /*
279 * This call comes in very early, before mem_map is setup.
280 * It is called only for swapper_pg_dir, which already has
281 * data on it.
282 */
283 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
284}
285
286static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count)
287{
288 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
289}
290
291static void vmi_release_pte(unsigned long pfn)
292{
293 vmi_ops.release_page(pfn, VMI_PAGE_L1);
294}
295
296static void vmi_release_pmd(unsigned long pfn)
297{
298 vmi_ops.release_page(pfn, VMI_PAGE_L2);
299}
300
301/*
302 * We use the pgd_free hook for releasing the pgd page:
303 */
304static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd)
305{
306 unsigned long pfn = __pa(pgd) >> PAGE_SHIFT;
307
308 vmi_ops.release_page(pfn, VMI_PAGE_L2);
309}
310
311/*
312 * Helper macros for MMU update flags. We can defer updates until a flush
313 * or page invalidation only if the update is to the current address space
314 * (otherwise, there is no flush). We must check against init_mm, since
315 * this could be a kernel update, which usually passes init_mm, although
316 * sometimes this check can be skipped if we know the particular function
317 * is only called on user mode PTEs. We could change the kernel to pass
318 * current->active_mm here, but in particular, I was unsure if changing
319 * mm/highmem.c to do this would still be correct on other architectures.
320 */
321#define is_current_as(mm, mustbeuser) ((mm) == current->active_mm || \
322 (!mustbeuser && (mm) == &init_mm))
323#define vmi_flags_addr(mm, addr, level, user) \
324 ((level) | (is_current_as(mm, user) ? \
325 (VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0))
326#define vmi_flags_addr_defer(mm, addr, level, user) \
327 ((level) | (is_current_as(mm, user) ? \
328 (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0))
329
330static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
331{
332 vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
333}
334
335static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
336{
337 vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0));
338}
339
340static void vmi_set_pte(pte_t *ptep, pte_t pte)
341{
342 /* XXX because of set_pmd_pte, this can be called on PT or PD layers */
343 vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT);
344}
345
346static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
347{
348 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
349}
350
351static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval)
352{
353#ifdef CONFIG_X86_PAE
354 const pte_t pte = { .pte = pmdval.pmd };
355#else
356 const pte_t pte = { pmdval.pud.pgd.pgd };
357#endif
358 vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD);
359}
360
361#ifdef CONFIG_X86_PAE
362
363static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval)
364{
365 /*
366 * XXX This is called from set_pmd_pte, but at both PT
367 * and PD layers so the VMI_PAGE_PT flag is wrong. But
368 * it is only called for large page mapping changes,
369 * the Xen backend, doesn't support large pages, and the
370 * ESX backend doesn't depend on the flag.
371 */
372 set_64bit((unsigned long long *)ptep,pte_val(pteval));
373 vmi_ops.update_pte(ptep, VMI_PAGE_PT);
374}
375
376static void vmi_set_pud(pud_t *pudp, pud_t pudval)
377{
378 /* Um, eww */
379 const pte_t pte = { .pte = pudval.pgd.pgd };
380 vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP);
381}
382
383static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
384{
385 const pte_t pte = { .pte = 0 };
386 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
387}
388
389static void vmi_pmd_clear(pmd_t *pmd)
390{
391 const pte_t pte = { .pte = 0 };
392 vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD);
393}
394#endif
395
396#ifdef CONFIG_SMP
397static void __devinit
398vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
399 unsigned long start_esp)
400{
401 struct vmi_ap_state ap;
402
403 /* Default everything to zero. This is fine for most GPRs. */
404 memset(&ap, 0, sizeof(struct vmi_ap_state));
405
406 ap.gdtr_limit = GDT_SIZE - 1;
407 ap.gdtr_base = (unsigned long) get_cpu_gdt_table(phys_apicid);
408
409 ap.idtr_limit = IDT_ENTRIES * 8 - 1;
410 ap.idtr_base = (unsigned long) idt_table;
411
412 ap.ldtr = 0;
413
414 ap.cs = __KERNEL_CS;
415 ap.eip = (unsigned long) start_eip;
416 ap.ss = __KERNEL_DS;
417 ap.esp = (unsigned long) start_esp;
418
419 ap.ds = __USER_DS;
420 ap.es = __USER_DS;
421 ap.fs = __KERNEL_PERCPU;
422 ap.gs = __KERNEL_STACK_CANARY;
423
424 ap.eflags = 0;
425
426#ifdef CONFIG_X86_PAE
427 /* efer should match BSP efer. */
428 if (cpu_has_nx) {
429 unsigned l, h;
430 rdmsr(MSR_EFER, l, h);
431 ap.efer = (unsigned long long) h << 32 | l;
432 }
433#endif
434
435 ap.cr3 = __pa(swapper_pg_dir);
436 /* Protected mode, paging, AM, WP, NE, MP. */
437 ap.cr0 = 0x80050023;
438 ap.cr4 = mmu_cr4_features;
439 vmi_ops.set_initial_ap_state((u32)&ap, phys_apicid);
440}
441#endif
442
443static void vmi_start_context_switch(struct task_struct *prev)
444{
445 paravirt_start_context_switch(prev);
446 vmi_ops.set_lazy_mode(2);
447}
448
449static void vmi_end_context_switch(struct task_struct *next)
450{
451 vmi_ops.set_lazy_mode(0);
452 paravirt_end_context_switch(next);
453}
454
455static void vmi_enter_lazy_mmu(void)
456{
457 paravirt_enter_lazy_mmu();
458 vmi_ops.set_lazy_mode(1);
459}
460
461static void vmi_leave_lazy_mmu(void)
462{
463 vmi_ops.set_lazy_mode(0);
464 paravirt_leave_lazy_mmu();
465}
466
467static inline int __init check_vmi_rom(struct vrom_header *rom)
468{
469 struct pci_header *pci;
470 struct pnp_header *pnp;
471 const char *manufacturer = "UNKNOWN";
472 const char *product = "UNKNOWN";
473 const char *license = "unspecified";
474
475 if (rom->rom_signature != 0xaa55)
476 return 0;
477 if (rom->vrom_signature != VMI_SIGNATURE)
478 return 0;
479 if (rom->api_version_maj != VMI_API_REV_MAJOR ||
480 rom->api_version_min+1 < VMI_API_REV_MINOR+1) {
481 printk(KERN_WARNING "VMI: Found mismatched rom version %d.%d\n",
482 rom->api_version_maj,
483 rom->api_version_min);
484 return 0;
485 }
486
487 /*
488 * Relying on the VMI_SIGNATURE field is not 100% safe, so check
489 * the PCI header and device type to make sure this is really a
490 * VMI device.
491 */
492 if (!rom->pci_header_offs) {
493 printk(KERN_WARNING "VMI: ROM does not contain PCI header.\n");
494 return 0;
495 }
496
497 pci = (struct pci_header *)((char *)rom+rom->pci_header_offs);
498 if (pci->vendorID != PCI_VENDOR_ID_VMWARE ||
499 pci->deviceID != PCI_DEVICE_ID_VMWARE_VMI) {
500 /* Allow it to run... anyways, but warn */
501 printk(KERN_WARNING "VMI: ROM from unknown manufacturer\n");
502 }
503
504 if (rom->pnp_header_offs) {
505 pnp = (struct pnp_header *)((char *)rom+rom->pnp_header_offs);
506 if (pnp->manufacturer_offset)
507 manufacturer = (const char *)rom+pnp->manufacturer_offset;
508 if (pnp->product_offset)
509 product = (const char *)rom+pnp->product_offset;
510 }
511
512 if (rom->license_offs)
513 license = (char *)rom+rom->license_offs;
514
515 printk(KERN_INFO "VMI: Found %s %s, API version %d.%d, ROM version %d.%d\n",
516 manufacturer, product,
517 rom->api_version_maj, rom->api_version_min,
518 pci->rom_version_maj, pci->rom_version_min);
519
520 /* Don't allow BSD/MIT here for now because we don't want to end up
521 with any binary only shim layers */
522 if (strcmp(license, "GPL") && strcmp(license, "GPL v2")) {
523 printk(KERN_WARNING "VMI: Non GPL license `%s' found for ROM. Not used.\n",
524 license);
525 return 0;
526 }
527
528 return 1;
529}
530
531/*
532 * Probe for the VMI option ROM
533 */
534static inline int __init probe_vmi_rom(void)
535{
536 unsigned long base;
537
538 /* VMI ROM is in option ROM area, check signature */
539 for (base = 0xC0000; base < 0xE0000; base += 2048) {
540 struct vrom_header *romstart;
541 romstart = (struct vrom_header *)isa_bus_to_virt(base);
542 if (check_vmi_rom(romstart)) {
543 vmi_rom = romstart;
544 return 1;
545 }
546 }
547 return 0;
548}
549
550/*
551 * VMI setup common to all processors
552 */
553void vmi_bringup(void)
554{
555 /* We must establish the lowmem mapping for MMU ops to work */
556 if (vmi_ops.set_linear_mapping)
557 vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0);
558}
559
560/*
561 * Return a pointer to a VMI function or NULL if unimplemented
562 */
563static void *vmi_get_function(int vmicall)
564{
565 u64 reloc;
566 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc;
567 reloc = call_vrom_long_func(vmi_rom, get_reloc, vmicall);
568 BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL);
569 if (rel->type == VMI_RELOCATION_CALL_REL)
570 return (void *)rel->eip;
571 else
572 return NULL;
573}
574
575/*
576 * Helper macro for making the VMI paravirt-ops fill code readable.
577 * For unimplemented operations, fall back to default, unless nop
578 * is returned by the ROM.
579 */
580#define para_fill(opname, vmicall) \
581do { \
582 reloc = call_vrom_long_func(vmi_rom, get_reloc, \
583 VMI_CALL_##vmicall); \
584 if (rel->type == VMI_RELOCATION_CALL_REL) \
585 opname = (void *)rel->eip; \
586 else if (rel->type == VMI_RELOCATION_NOP) \
587 opname = (void *)vmi_nop; \
588 else if (rel->type != VMI_RELOCATION_NONE) \
589 printk(KERN_WARNING "VMI: Unknown relocation " \
590 "type %d for " #vmicall"\n",\
591 rel->type); \
592} while (0)
593
594/*
595 * Helper macro for making the VMI paravirt-ops fill code readable.
596 * For cached operations which do not match the VMI ROM ABI and must
597 * go through a tranlation stub. Ignore NOPs, since it is not clear
598 * a NOP * VMI function corresponds to a NOP paravirt-op when the
599 * functions are not in 1-1 correspondence.
600 */
601#define para_wrap(opname, wrapper, cache, vmicall) \
602do { \
603 reloc = call_vrom_long_func(vmi_rom, get_reloc, \
604 VMI_CALL_##vmicall); \
605 BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \
606 if (rel->type == VMI_RELOCATION_CALL_REL) { \
607 opname = wrapper; \
608 vmi_ops.cache = (void *)rel->eip; \
609 } \
610} while (0)
611
612/*
613 * Activate the VMI interface and switch into paravirtualized mode
614 */
615static inline int __init activate_vmi(void)
616{
617 short kernel_cs;
618 u64 reloc;
619 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc;
620
621 /*
622 * Prevent page tables from being allocated in highmem, even if
623 * CONFIG_HIGHPTE is enabled.
624 */
625 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
626
627 if (call_vrom_func(vmi_rom, vmi_init) != 0) {
628 printk(KERN_ERR "VMI ROM failed to initialize!");
629 return 0;
630 }
631 savesegment(cs, kernel_cs);
632
633 pv_info.paravirt_enabled = 1;
634 pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
635 pv_info.name = "vmi [deprecated]";
636
637 pv_init_ops.patch = vmi_patch;
638
639 /*
640 * Many of these operations are ABI compatible with VMI.
641 * This means we can fill in the paravirt-ops with direct
642 * pointers into the VMI ROM. If the calling convention for
643 * these operations changes, this code needs to be updated.
644 *
645 * Exceptions
646 * CPUID paravirt-op uses pointers, not the native ISA
647 * halt has no VMI equivalent; all VMI halts are "safe"
648 * no MSR support yet - just trap and emulate. VMI uses the
649 * same ABI as the native ISA, but Linux wants exceptions
650 * from bogus MSR read / write handled
651 * rdpmc is not yet used in Linux
652 */
653
654 /* CPUID is special, so very special it gets wrapped like a present */
655 para_wrap(pv_cpu_ops.cpuid, vmi_cpuid, cpuid, CPUID);
656
657 para_fill(pv_cpu_ops.clts, CLTS);
658 para_fill(pv_cpu_ops.get_debugreg, GetDR);
659 para_fill(pv_cpu_ops.set_debugreg, SetDR);
660 para_fill(pv_cpu_ops.read_cr0, GetCR0);
661 para_fill(pv_mmu_ops.read_cr2, GetCR2);
662 para_fill(pv_mmu_ops.read_cr3, GetCR3);
663 para_fill(pv_cpu_ops.read_cr4, GetCR4);
664 para_fill(pv_cpu_ops.write_cr0, SetCR0);
665 para_fill(pv_mmu_ops.write_cr2, SetCR2);
666 para_fill(pv_mmu_ops.write_cr3, SetCR3);
667 para_fill(pv_cpu_ops.write_cr4, SetCR4);
668
669 para_fill(pv_irq_ops.save_fl.func, GetInterruptMask);
670 para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask);
671 para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts);
672 para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts);
673
674 para_fill(pv_cpu_ops.wbinvd, WBINVD);
675 para_fill(pv_cpu_ops.read_tsc, RDTSC);
676
677 /* The following we emulate with trap and emulate for now */
678 /* paravirt_ops.read_msr = vmi_rdmsr */
679 /* paravirt_ops.write_msr = vmi_wrmsr */
680 /* paravirt_ops.rdpmc = vmi_rdpmc */
681
682 /* TR interface doesn't pass TR value, wrap */
683 para_wrap(pv_cpu_ops.load_tr_desc, vmi_set_tr, set_tr, SetTR);
684
685 /* LDT is special, too */
686 para_wrap(pv_cpu_ops.set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
687
688 para_fill(pv_cpu_ops.load_gdt, SetGDT);
689 para_fill(pv_cpu_ops.load_idt, SetIDT);
690 para_fill(pv_cpu_ops.store_gdt, GetGDT);
691 para_fill(pv_cpu_ops.store_idt, GetIDT);
692 para_fill(pv_cpu_ops.store_tr, GetTR);
693 pv_cpu_ops.load_tls = vmi_load_tls;
694 para_wrap(pv_cpu_ops.write_ldt_entry, vmi_write_ldt_entry,
695 write_ldt_entry, WriteLDTEntry);
696 para_wrap(pv_cpu_ops.write_gdt_entry, vmi_write_gdt_entry,
697 write_gdt_entry, WriteGDTEntry);
698 para_wrap(pv_cpu_ops.write_idt_entry, vmi_write_idt_entry,
699 write_idt_entry, WriteIDTEntry);
700 para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack);
701 para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
702 para_fill(pv_cpu_ops.io_delay, IODelay);
703
704 para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch,
705 set_lazy_mode, SetLazyMode);
706 para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch,
707 set_lazy_mode, SetLazyMode);
708
709 para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
710 set_lazy_mode, SetLazyMode);
711 para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu,
712 set_lazy_mode, SetLazyMode);
713
714 /* user and kernel flush are just handled with different flags to FlushTLB */
715 para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB);
716 para_wrap(pv_mmu_ops.flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB);
717 para_fill(pv_mmu_ops.flush_tlb_single, InvalPage);
718
719 /*
720 * Until a standard flag format can be agreed on, we need to
721 * implement these as wrappers in Linux. Get the VMI ROM
722 * function pointers for the two backend calls.
723 */
724#ifdef CONFIG_X86_PAE
725 vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxELong);
726 vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxELong);
727#else
728 vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE);
729 vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE);
730#endif
731
732 if (vmi_ops.set_pte) {
733 pv_mmu_ops.set_pte = vmi_set_pte;
734 pv_mmu_ops.set_pte_at = vmi_set_pte_at;
735 pv_mmu_ops.set_pmd = vmi_set_pmd;
736#ifdef CONFIG_X86_PAE
737 pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic;
738 pv_mmu_ops.set_pud = vmi_set_pud;
739 pv_mmu_ops.pte_clear = vmi_pte_clear;
740 pv_mmu_ops.pmd_clear = vmi_pmd_clear;
741#endif
742 }
743
744 if (vmi_ops.update_pte) {
745 pv_mmu_ops.pte_update = vmi_update_pte;
746 pv_mmu_ops.pte_update_defer = vmi_update_pte_defer;
747 }
748
749 vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
750 if (vmi_ops.allocate_page) {
751 pv_mmu_ops.alloc_pte = vmi_allocate_pte;
752 pv_mmu_ops.alloc_pmd = vmi_allocate_pmd;
753 pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone;
754 }
755
756 vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
757 if (vmi_ops.release_page) {
758 pv_mmu_ops.release_pte = vmi_release_pte;
759 pv_mmu_ops.release_pmd = vmi_release_pmd;
760 pv_mmu_ops.pgd_free = vmi_pgd_free;
761 }
762
763 /* Set linear is needed in all cases */
764 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
765
766 /*
767 * These MUST always be patched. Don't support indirect jumps
768 * through these operations, as the VMI interface may use either
769 * a jump or a call to get to these operations, depending on
770 * the backend. They are performance critical anyway, so requiring
771 * a patch is not a big problem.
772 */
773 pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
774 pv_cpu_ops.iret = (void *)0xbadbab0;
775
776#ifdef CONFIG_SMP
777 para_wrap(pv_apic_ops.startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
778#endif
779
780#ifdef CONFIG_X86_LOCAL_APIC
781 para_fill(apic->read, APICRead);
782 para_fill(apic->write, APICWrite);
783#endif
784
785 /*
786 * Check for VMI timer functionality by probing for a cycle frequency method
787 */
788 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency);
789 if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) {
790 vmi_timer_ops.get_cycle_frequency = (void *)rel->eip;
791 vmi_timer_ops.get_cycle_counter =
792 vmi_get_function(VMI_CALL_GetCycleCounter);
793 vmi_timer_ops.get_wallclock =
794 vmi_get_function(VMI_CALL_GetWallclockTime);
795 vmi_timer_ops.wallclock_updated =
796 vmi_get_function(VMI_CALL_WallclockUpdated);
797 vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
798 vmi_timer_ops.cancel_alarm =
799 vmi_get_function(VMI_CALL_CancelAlarm);
800 x86_init.timers.timer_init = vmi_time_init;
801#ifdef CONFIG_X86_LOCAL_APIC
802 x86_init.timers.setup_percpu_clockev = vmi_time_bsp_init;
803 x86_cpuinit.setup_percpu_clockev = vmi_time_ap_init;
804#endif
805 pv_time_ops.sched_clock = vmi_sched_clock;
806 x86_platform.calibrate_tsc = vmi_tsc_khz;
807 x86_platform.get_wallclock = vmi_get_wallclock;
808 x86_platform.set_wallclock = vmi_set_wallclock;
809
810 /* We have true wallclock functions; disable CMOS clock sync */
811 no_sync_cmos_clock = 1;
812 } else {
813 disable_noidle = 1;
814 disable_vmi_timer = 1;
815 }
816
817 para_fill(pv_irq_ops.safe_halt, Halt);
818
819 /*
820 * Alternative instruction rewriting doesn't happen soon enough
821 * to convert VMI_IRET to a call instead of a jump; so we have
822 * to do this before IRQs get reenabled. Fortunately, it is
823 * idempotent.
824 */
825 apply_paravirt(__parainstructions, __parainstructions_end);
826
827 vmi_bringup();
828
829 return 1;
830}
831
832#undef para_fill
833
834void __init vmi_init(void)
835{
836 if (!vmi_rom)
837 probe_vmi_rom();
838 else
839 check_vmi_rom(vmi_rom);
840
841 /* In case probing for or validating the ROM failed, basil */
842 if (!vmi_rom)
843 return;
844
845 reserve_top_address(-vmi_rom->virtual_top);
846
847#ifdef CONFIG_X86_IO_APIC
848 /* This is virtual hardware; timer routing is wired correctly */
849 no_timer_check = 1;
850#endif
851}
852
853void __init vmi_activate(void)
854{
855 unsigned long flags;
856
857 if (!vmi_rom)
858 return;
859
860 local_irq_save(flags);
861 activate_vmi();
862 local_irq_restore(flags & X86_EFLAGS_IF);
863}
864
865static int __init parse_vmi(char *arg)
866{
867 if (!arg)
868 return -EINVAL;
869
870 if (!strcmp(arg, "disable_pge")) {
871 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE);
872 disable_pge = 1;
873 } else if (!strcmp(arg, "disable_pse")) {
874 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE);
875 disable_pse = 1;
876 } else if (!strcmp(arg, "disable_sep")) {
877 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
878 disable_sep = 1;
879 } else if (!strcmp(arg, "disable_tsc")) {
880 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC);
881 disable_tsc = 1;
882 } else if (!strcmp(arg, "disable_mtrr")) {
883 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_MTRR);
884 disable_mtrr = 1;
885 } else if (!strcmp(arg, "disable_timer")) {
886 disable_vmi_timer = 1;
887 disable_noidle = 1;
888 } else if (!strcmp(arg, "disable_noidle"))
889 disable_noidle = 1;
890 return 0;
891}
892
893early_param("vmi", parse_vmi);
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
deleted file mode 100644
index 5e1ff66ecd73..000000000000
--- a/arch/x86/kernel/vmiclock_32.c
+++ /dev/null
@@ -1,317 +0,0 @@
1/*
2 * VMI paravirtual timer support routines.
3 *
4 * Copyright (C) 2007, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 */
22
23#include <linux/smp.h>
24#include <linux/interrupt.h>
25#include <linux/cpumask.h>
26#include <linux/clocksource.h>
27#include <linux/clockchips.h>
28
29#include <asm/vmi.h>
30#include <asm/vmi_time.h>
31#include <asm/apicdef.h>
32#include <asm/apic.h>
33#include <asm/timer.h>
34#include <asm/i8253.h>
35#include <asm/irq_vectors.h>
36
37#define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
38#define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
39
40static DEFINE_PER_CPU(struct clock_event_device, local_events);
41
42static inline u32 vmi_counter(u32 flags)
43{
44 /* Given VMI_ONESHOT or VMI_PERIODIC, return the corresponding
45 * cycle counter. */
46 return flags & VMI_ALARM_COUNTER_MASK;
47}
48
49/* paravirt_ops.get_wallclock = vmi_get_wallclock */
50unsigned long vmi_get_wallclock(void)
51{
52 unsigned long long wallclock;
53 wallclock = vmi_timer_ops.get_wallclock(); // nsec
54 (void)do_div(wallclock, 1000000000); // sec
55
56 return wallclock;
57}
58
59/* paravirt_ops.set_wallclock = vmi_set_wallclock */
60int vmi_set_wallclock(unsigned long now)
61{
62 return 0;
63}
64
65/* paravirt_ops.sched_clock = vmi_sched_clock */
66unsigned long long vmi_sched_clock(void)
67{
68 return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
69}
70
71/* x86_platform.calibrate_tsc = vmi_tsc_khz */
72unsigned long vmi_tsc_khz(void)
73{
74 unsigned long long khz;
75 khz = vmi_timer_ops.get_cycle_frequency();
76 (void)do_div(khz, 1000);
77 return khz;
78}
79
80static inline unsigned int vmi_get_timer_vector(void)
81{
82 return IRQ0_VECTOR;
83}
84
85/** vmi clockchip */
86#ifdef CONFIG_X86_LOCAL_APIC
87static unsigned int startup_timer_irq(unsigned int irq)
88{
89 unsigned long val = apic_read(APIC_LVTT);
90 apic_write(APIC_LVTT, vmi_get_timer_vector());
91
92 return (val & APIC_SEND_PENDING);
93}
94
95static void mask_timer_irq(unsigned int irq)
96{
97 unsigned long val = apic_read(APIC_LVTT);
98 apic_write(APIC_LVTT, val | APIC_LVT_MASKED);
99}
100
101static void unmask_timer_irq(unsigned int irq)
102{
103 unsigned long val = apic_read(APIC_LVTT);
104 apic_write(APIC_LVTT, val & ~APIC_LVT_MASKED);
105}
106
107static void ack_timer_irq(unsigned int irq)
108{
109 ack_APIC_irq();
110}
111
112static struct irq_chip vmi_chip __read_mostly = {
113 .name = "VMI-LOCAL",
114 .startup = startup_timer_irq,
115 .mask = mask_timer_irq,
116 .unmask = unmask_timer_irq,
117 .ack = ack_timer_irq
118};
119#endif
120
121/** vmi clockevent */
122#define VMI_ALARM_WIRED_IRQ0 0x00000000
123#define VMI_ALARM_WIRED_LVTT 0x00010000
124static int vmi_wiring = VMI_ALARM_WIRED_IRQ0;
125
126static inline int vmi_get_alarm_wiring(void)
127{
128 return vmi_wiring;
129}
130
131static void vmi_timer_set_mode(enum clock_event_mode mode,
132 struct clock_event_device *evt)
133{
134 cycle_t now, cycles_per_hz;
135 BUG_ON(!irqs_disabled());
136
137 switch (mode) {
138 case CLOCK_EVT_MODE_ONESHOT:
139 case CLOCK_EVT_MODE_RESUME:
140 break;
141 case CLOCK_EVT_MODE_PERIODIC:
142 cycles_per_hz = vmi_timer_ops.get_cycle_frequency();
143 (void)do_div(cycles_per_hz, HZ);
144 now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_PERIODIC));
145 vmi_timer_ops.set_alarm(VMI_PERIODIC, now, cycles_per_hz);
146 break;
147 case CLOCK_EVT_MODE_UNUSED:
148 case CLOCK_EVT_MODE_SHUTDOWN:
149 switch (evt->mode) {
150 case CLOCK_EVT_MODE_ONESHOT:
151 vmi_timer_ops.cancel_alarm(VMI_ONESHOT);
152 break;
153 case CLOCK_EVT_MODE_PERIODIC:
154 vmi_timer_ops.cancel_alarm(VMI_PERIODIC);
155 break;
156 default:
157 break;
158 }
159 break;
160 default:
161 break;
162 }
163}
164
165static int vmi_timer_next_event(unsigned long delta,
166 struct clock_event_device *evt)
167{
168 /* Unfortunately, set_next_event interface only passes relative
169 * expiry, but we want absolute expiry. It'd be better if were
170 * were passed an absolute expiry, since a bunch of time may
171 * have been stolen between the time the delta is computed and
172 * when we set the alarm below. */
173 cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT));
174
175 BUG_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
176 vmi_timer_ops.set_alarm(VMI_ONESHOT, now + delta, 0);
177 return 0;
178}
179
180static struct clock_event_device vmi_clockevent = {
181 .name = "vmi-timer",
182 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
183 .shift = 22,
184 .set_mode = vmi_timer_set_mode,
185 .set_next_event = vmi_timer_next_event,
186 .rating = 1000,
187 .irq = 0,
188};
189
190static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
191{
192 struct clock_event_device *evt = &__get_cpu_var(local_events);
193 evt->event_handler(evt);
194 return IRQ_HANDLED;
195}
196
197static struct irqaction vmi_clock_action = {
198 .name = "vmi-timer",
199 .handler = vmi_timer_interrupt,
200 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
201};
202
203static void __devinit vmi_time_init_clockevent(void)
204{
205 cycle_t cycles_per_msec;
206 struct clock_event_device *evt;
207
208 int cpu = smp_processor_id();
209 evt = &__get_cpu_var(local_events);
210
211 /* Use cycles_per_msec since div_sc params are 32-bits. */
212 cycles_per_msec = vmi_timer_ops.get_cycle_frequency();
213 (void)do_div(cycles_per_msec, 1000);
214
215 memcpy(evt, &vmi_clockevent, sizeof(*evt));
216 /* Must pick .shift such that .mult fits in 32-bits. Choosing
217 * .shift to be 22 allows 2^(32-22) cycles per nano-seconds
218 * before overflow. */
219 evt->mult = div_sc(cycles_per_msec, NSEC_PER_MSEC, evt->shift);
220 /* Upper bound is clockevent's use of ulong for cycle deltas. */
221 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
222 evt->min_delta_ns = clockevent_delta2ns(1, evt);
223 evt->cpumask = cpumask_of(cpu);
224
225 printk(KERN_WARNING "vmi: registering clock event %s. mult=%u shift=%u\n",
226 evt->name, evt->mult, evt->shift);
227 clockevents_register_device(evt);
228}
229
230void __init vmi_time_init(void)
231{
232 unsigned int cpu;
233 /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
234 outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
235
236 vmi_time_init_clockevent();
237 setup_irq(0, &vmi_clock_action);
238 for_each_possible_cpu(cpu)
239 per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0;
240}
241
242#ifdef CONFIG_X86_LOCAL_APIC
243void __devinit vmi_time_bsp_init(void)
244{
245 /*
246 * On APIC systems, we want local timers to fire on each cpu. We do
247 * this by programming LVTT to deliver timer events to the IRQ handler
248 * for IRQ-0, since we can't re-use the APIC local timer handler
249 * without interfering with that code.
250 */
251 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
252 local_irq_disable();
253#ifdef CONFIG_SMP
254 /*
255 * XXX handle_percpu_irq only defined for SMP; we need to switch over
256 * to using it, since this is a local interrupt, which each CPU must
257 * handle individually without locking out or dropping simultaneous
258 * local timers on other CPUs. We also don't want to trigger the
259 * quirk workaround code for interrupts which gets invoked from
260 * handle_percpu_irq via eoi, so we use our own IRQ chip.
261 */
262 set_irq_chip_and_handler_name(0, &vmi_chip, handle_percpu_irq, "lvtt");
263#else
264 set_irq_chip_and_handler_name(0, &vmi_chip, handle_edge_irq, "lvtt");
265#endif
266 vmi_wiring = VMI_ALARM_WIRED_LVTT;
267 apic_write(APIC_LVTT, vmi_get_timer_vector());
268 local_irq_enable();
269 clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
270}
271
272void __devinit vmi_time_ap_init(void)
273{
274 vmi_time_init_clockevent();
275 apic_write(APIC_LVTT, vmi_get_timer_vector());
276}
277#endif
278
279/** vmi clocksource */
280static struct clocksource clocksource_vmi;
281
282static cycle_t read_real_cycles(struct clocksource *cs)
283{
284 cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
285 return max(ret, clocksource_vmi.cycle_last);
286}
287
288static struct clocksource clocksource_vmi = {
289 .name = "vmi-timer",
290 .rating = 450,
291 .read = read_real_cycles,
292 .mask = CLOCKSOURCE_MASK(64),
293 .mult = 0, /* to be set */
294 .shift = 22,
295 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
296};
297
298static int __init init_vmi_clocksource(void)
299{
300 cycle_t cycles_per_msec;
301
302 if (!vmi_timer_ops.get_cycle_frequency)
303 return 0;
304 /* Use khz2mult rather than hz2mult since hz arg is only 32-bits. */
305 cycles_per_msec = vmi_timer_ops.get_cycle_frequency();
306 (void)do_div(cycles_per_msec, 1000);
307
308 /* Note that clocksource.{mult, shift} converts in the opposite direction
309 * as clockevents. */
310 clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
311 clocksource_vmi.shift);
312
313 printk(KERN_WARNING "vmi: registering clock source khz=%lld\n", cycles_per_msec);
314 return clocksource_register(&clocksource_vmi);
315
316}
317module_init(init_vmi_clocksource);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index d0bb52296fa3..38e2b67807e1 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -242,6 +242,12 @@ SECTIONS
242 __x86_cpu_dev_end = .; 242 __x86_cpu_dev_end = .;
243 } 243 }
244 244
245 /*
246 * start address and size of operations which during runtime
247 * can be patched with virtualization friendly instructions or
248 * baremetal native ones. Think page table operations.
249 * Details in paravirt_types.h
250 */
245 . = ALIGN(8); 251 . = ALIGN(8);
246 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { 252 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
247 __parainstructions = .; 253 __parainstructions = .;
@@ -249,6 +255,11 @@ SECTIONS
249 __parainstructions_end = .; 255 __parainstructions_end = .;
250 } 256 }
251 257
258 /*
259 * struct alt_inst entries. From the header (alternative.h):
260 * "Alternative instructions for different CPU types or capabilities"
261 * Think locking instructions on spinlocks.
262 */
252 . = ALIGN(8); 263 . = ALIGN(8);
253 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { 264 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
254 __alt_instructions = .; 265 __alt_instructions = .;
@@ -256,11 +267,28 @@ SECTIONS
256 __alt_instructions_end = .; 267 __alt_instructions_end = .;
257 } 268 }
258 269
270 /*
271 * And here are the replacement instructions. The linker sticks
272 * them as binary blobs. The .altinstructions has enough data to
273 * get the address and the length of them to patch the kernel safely.
274 */
259 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { 275 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
260 *(.altinstr_replacement) 276 *(.altinstr_replacement)
261 } 277 }
262 278
263 /* 279 /*
280 * struct iommu_table_entry entries are injected in this section.
281 * It is an array of IOMMUs which during run time gets sorted depending
282 * on its dependency order. After rootfs_initcall is complete
283 * this section can be safely removed.
284 */
285 .iommu_table : AT(ADDR(.iommu_table) - LOAD_OFFSET) {
286 __iommu_table = .;
287 *(.iommu_table)
288 __iommu_table_end = .;
289 }
290 . = ALIGN(8);
291 /*
264 * .exit.text is discard at runtime, not link time, to deal with 292 * .exit.text is discard at runtime, not link time, to deal with
265 * references from .altinstructions and .eh_frame 293 * references from .altinstructions and .eh_frame
266 */ 294 */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 77d8c0f4817d..22b06f7660f4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1056,14 +1056,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
1056 1056
1057 vcpu->arch.apic = apic; 1057 vcpu->arch.apic = apic;
1058 1058
1059 apic->regs_page = alloc_page(GFP_KERNEL); 1059 apic->regs_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
1060 if (apic->regs_page == NULL) { 1060 if (apic->regs_page == NULL) {
1061 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 1061 printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
1062 vcpu->vcpu_id); 1062 vcpu->vcpu_id);
1063 goto nomem_free_apic; 1063 goto nomem_free_apic;
1064 } 1064 }
1065 apic->regs = page_address(apic->regs_page); 1065 apic->regs = page_address(apic->regs_page);
1066 memset(apic->regs, 0, PAGE_SIZE);
1067 apic->vcpu = vcpu; 1066 apic->vcpu = vcpu;
1068 1067
1069 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 1068 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index bc5b9b8d4a33..8a3f9f64f86f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -766,7 +766,6 @@ static void init_vmcb(struct vcpu_svm *svm)
766 766
767 control->iopm_base_pa = iopm_base; 767 control->iopm_base_pa = iopm_base;
768 control->msrpm_base_pa = __pa(svm->msrpm); 768 control->msrpm_base_pa = __pa(svm->msrpm);
769 control->tsc_offset = 0;
770 control->int_ctl = V_INTR_MASKING_MASK; 769 control->int_ctl = V_INTR_MASKING_MASK;
771 770
772 init_seg(&save->es); 771 init_seg(&save->es);
@@ -902,6 +901,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
902 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 901 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
903 svm->asid_generation = 0; 902 svm->asid_generation = 0;
904 init_vmcb(svm); 903 init_vmcb(svm);
904 svm->vmcb->control.tsc_offset = 0-native_read_tsc();
905 905
906 err = fx_init(&svm->vcpu); 906 err = fx_init(&svm->vcpu);
907 if (err) 907 if (err)
@@ -3163,8 +3163,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3163 sync_lapic_to_cr8(vcpu); 3163 sync_lapic_to_cr8(vcpu);
3164 3164
3165 save_host_msrs(vcpu); 3165 save_host_msrs(vcpu);
3166 fs_selector = kvm_read_fs(); 3166 savesegment(fs, fs_selector);
3167 gs_selector = kvm_read_gs(); 3167 savesegment(gs, gs_selector);
3168 ldt_selector = kvm_read_ldt(); 3168 ldt_selector = kvm_read_ldt();
3169 svm->vmcb->save.cr2 = vcpu->arch.cr2; 3169 svm->vmcb->save.cr2 = vcpu->arch.cr2;
3170 /* required for live migration with NPT */ 3170 /* required for live migration with NPT */
@@ -3251,10 +3251,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3251 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; 3251 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3252 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; 3252 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
3253 3253
3254 kvm_load_fs(fs_selector);
3255 kvm_load_gs(gs_selector);
3256 kvm_load_ldt(ldt_selector);
3257 load_host_msrs(vcpu); 3254 load_host_msrs(vcpu);
3255 loadsegment(fs, fs_selector);
3256#ifdef CONFIG_X86_64
3257 load_gs_index(gs_selector);
3258 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
3259#else
3260 loadsegment(gs, gs_selector);
3261#endif
3262 kvm_load_ldt(ldt_selector);
3258 3263
3259 reload_tss(vcpu); 3264 reload_tss(vcpu);
3260 3265
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 49b25eee25ac..7bddfab12013 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -803,7 +803,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
803 */ 803 */
804 vmx->host_state.ldt_sel = kvm_read_ldt(); 804 vmx->host_state.ldt_sel = kvm_read_ldt();
805 vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; 805 vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
806 vmx->host_state.fs_sel = kvm_read_fs(); 806 savesegment(fs, vmx->host_state.fs_sel);
807 if (!(vmx->host_state.fs_sel & 7)) { 807 if (!(vmx->host_state.fs_sel & 7)) {
808 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); 808 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
809 vmx->host_state.fs_reload_needed = 0; 809 vmx->host_state.fs_reload_needed = 0;
@@ -811,7 +811,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
811 vmcs_write16(HOST_FS_SELECTOR, 0); 811 vmcs_write16(HOST_FS_SELECTOR, 0);
812 vmx->host_state.fs_reload_needed = 1; 812 vmx->host_state.fs_reload_needed = 1;
813 } 813 }
814 vmx->host_state.gs_sel = kvm_read_gs(); 814 savesegment(gs, vmx->host_state.gs_sel);
815 if (!(vmx->host_state.gs_sel & 7)) 815 if (!(vmx->host_state.gs_sel & 7))
816 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); 816 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
817 else { 817 else {
@@ -841,27 +841,21 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
841 841
842static void __vmx_load_host_state(struct vcpu_vmx *vmx) 842static void __vmx_load_host_state(struct vcpu_vmx *vmx)
843{ 843{
844 unsigned long flags;
845
846 if (!vmx->host_state.loaded) 844 if (!vmx->host_state.loaded)
847 return; 845 return;
848 846
849 ++vmx->vcpu.stat.host_state_reload; 847 ++vmx->vcpu.stat.host_state_reload;
850 vmx->host_state.loaded = 0; 848 vmx->host_state.loaded = 0;
851 if (vmx->host_state.fs_reload_needed) 849 if (vmx->host_state.fs_reload_needed)
852 kvm_load_fs(vmx->host_state.fs_sel); 850 loadsegment(fs, vmx->host_state.fs_sel);
853 if (vmx->host_state.gs_ldt_reload_needed) { 851 if (vmx->host_state.gs_ldt_reload_needed) {
854 kvm_load_ldt(vmx->host_state.ldt_sel); 852 kvm_load_ldt(vmx->host_state.ldt_sel);
855 /*
856 * If we have to reload gs, we must take care to
857 * preserve our gs base.
858 */
859 local_irq_save(flags);
860 kvm_load_gs(vmx->host_state.gs_sel);
861#ifdef CONFIG_X86_64 853#ifdef CONFIG_X86_64
862 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); 854 load_gs_index(vmx->host_state.gs_sel);
855 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
856#else
857 loadsegment(gs, vmx->host_state.gs_sel);
863#endif 858#endif
864 local_irq_restore(flags);
865 } 859 }
866 reload_tss(); 860 reload_tss();
867#ifdef CONFIG_X86_64 861#ifdef CONFIG_X86_64
@@ -2589,8 +2583,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2589 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ 2583 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
2590 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 2584 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
2591 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 2585 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
2592 vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */ 2586 vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
2593 vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */ 2587 vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
2594 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 2588 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
2595#ifdef CONFIG_X86_64 2589#ifdef CONFIG_X86_64
2596 rdmsrl(MSR_FS_BASE, a); 2590 rdmsrl(MSR_FS_BASE, a);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3a09c625d526..6c2ecf0a806d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1991,13 +1991,14 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1991 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | 1991 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
1992 0 /* Reserved, DCA */ | F(XMM4_1) | 1992 0 /* Reserved, DCA */ | F(XMM4_1) |
1993 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 1993 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
1994 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX); 1994 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
1995 F(F16C);
1995 /* cpuid 0x80000001.ecx */ 1996 /* cpuid 0x80000001.ecx */
1996 const u32 kvm_supported_word6_x86_features = 1997 const u32 kvm_supported_word6_x86_features =
1997 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | 1998 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
1998 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | 1999 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
1999 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | 2000 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
2000 0 /* SKINIT */ | 0 /* WDT */; 2001 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
2001 2002
2002 /* all calls to cpuid_count() should be made on the same cpu */ 2003 /* all calls to cpuid_count() should be made on the same cpu */
2003 get_cpu(); 2004 get_cpu();
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 9d5f55848455..73b1e1a1f489 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -791,22 +791,22 @@ static void lguest_flush_tlb_kernel(void)
791 * simple as setting a bit. We don't actually "ack" interrupts as such, we 791 * simple as setting a bit. We don't actually "ack" interrupts as such, we
792 * just mask and unmask them. I wonder if we should be cleverer? 792 * just mask and unmask them. I wonder if we should be cleverer?
793 */ 793 */
794static void disable_lguest_irq(unsigned int irq) 794static void disable_lguest_irq(struct irq_data *data)
795{ 795{
796 set_bit(irq, lguest_data.blocked_interrupts); 796 set_bit(data->irq, lguest_data.blocked_interrupts);
797} 797}
798 798
799static void enable_lguest_irq(unsigned int irq) 799static void enable_lguest_irq(struct irq_data *data)
800{ 800{
801 clear_bit(irq, lguest_data.blocked_interrupts); 801 clear_bit(data->irq, lguest_data.blocked_interrupts);
802} 802}
803 803
804/* This structure describes the lguest IRQ controller. */ 804/* This structure describes the lguest IRQ controller. */
805static struct irq_chip lguest_irq_controller = { 805static struct irq_chip lguest_irq_controller = {
806 .name = "lguest", 806 .name = "lguest",
807 .mask = disable_lguest_irq, 807 .irq_mask = disable_lguest_irq,
808 .mask_ack = disable_lguest_irq, 808 .irq_mask_ack = disable_lguest_irq,
809 .unmask = enable_lguest_irq, 809 .irq_unmask = enable_lguest_irq,
810}; 810};
811 811
812/* 812/*
@@ -838,12 +838,12 @@ static void __init lguest_init_IRQ(void)
838 * rather than set them in lguest_init_IRQ we are called here every time an 838 * rather than set them in lguest_init_IRQ we are called here every time an
839 * lguest device needs an interrupt. 839 * lguest device needs an interrupt.
840 * 840 *
841 * FIXME: irq_to_desc_alloc_node() can fail due to lack of memory, we should 841 * FIXME: irq_alloc_desc_at() can fail due to lack of memory, we should
842 * pass that up! 842 * pass that up!
843 */ 843 */
844void lguest_setup_irq(unsigned int irq) 844void lguest_setup_irq(unsigned int irq)
845{ 845{
846 irq_to_desc_alloc_node(irq, 0); 846 irq_alloc_desc_at(irq, 0);
847 set_irq_chip_and_handler_name(irq, &lguest_irq_controller, 847 set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
848 handle_level_irq, "level"); 848 handle_level_irq, "level");
849} 849}
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index 5415a9d06f53..b908a59eccf5 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -22,22 +22,187 @@ EXPORT_SYMBOL(memset);
22 22
23void *memmove(void *dest, const void *src, size_t n) 23void *memmove(void *dest, const void *src, size_t n)
24{ 24{
25 int d0, d1, d2; 25 int d0,d1,d2,d3,d4,d5;
26 26 char *ret = dest;
27 if (dest < src) { 27
28 memcpy(dest, src, n); 28 __asm__ __volatile__(
29 } else { 29 /* Handle more 16bytes in loop */
30 __asm__ __volatile__( 30 "cmp $0x10, %0\n\t"
31 "std\n\t" 31 "jb 1f\n\t"
32 "rep\n\t" 32
33 "movsb\n\t" 33 /* Decide forward/backward copy mode */
34 "cld" 34 "cmp %2, %1\n\t"
35 : "=&c" (d0), "=&S" (d1), "=&D" (d2) 35 "jb 2f\n\t"
36 :"0" (n), 36
37 "1" (n-1+src), 37 /*
38 "2" (n-1+dest) 38 * movs instruction have many startup latency
39 :"memory"); 39 * so we handle small size by general register.
40 } 40 */
41 return dest; 41 "cmp $680, %0\n\t"
42 "jb 3f\n\t"
43 /*
44 * movs instruction is only good for aligned case.
45 */
46 "mov %1, %3\n\t"
47 "xor %2, %3\n\t"
48 "and $0xff, %3\n\t"
49 "jz 4f\n\t"
50 "3:\n\t"
51 "sub $0x10, %0\n\t"
52
53 /*
54 * We gobble 16byts forward in each loop.
55 */
56 "3:\n\t"
57 "sub $0x10, %0\n\t"
58 "mov 0*4(%1), %3\n\t"
59 "mov 1*4(%1), %4\n\t"
60 "mov %3, 0*4(%2)\n\t"
61 "mov %4, 1*4(%2)\n\t"
62 "mov 2*4(%1), %3\n\t"
63 "mov 3*4(%1), %4\n\t"
64 "mov %3, 2*4(%2)\n\t"
65 "mov %4, 3*4(%2)\n\t"
66 "lea 0x10(%1), %1\n\t"
67 "lea 0x10(%2), %2\n\t"
68 "jae 3b\n\t"
69 "add $0x10, %0\n\t"
70 "jmp 1f\n\t"
71
72 /*
73 * Handle data forward by movs.
74 */
75 ".p2align 4\n\t"
76 "4:\n\t"
77 "mov -4(%1, %0), %3\n\t"
78 "lea -4(%2, %0), %4\n\t"
79 "shr $2, %0\n\t"
80 "rep movsl\n\t"
81 "mov %3, (%4)\n\t"
82 "jmp 11f\n\t"
83 /*
84 * Handle data backward by movs.
85 */
86 ".p2align 4\n\t"
87 "6:\n\t"
88 "mov (%1), %3\n\t"
89 "mov %2, %4\n\t"
90 "lea -4(%1, %0), %1\n\t"
91 "lea -4(%2, %0), %2\n\t"
92 "shr $2, %0\n\t"
93 "std\n\t"
94 "rep movsl\n\t"
95 "mov %3,(%4)\n\t"
96 "cld\n\t"
97 "jmp 11f\n\t"
98
99 /*
100 * Start to prepare for backward copy.
101 */
102 ".p2align 4\n\t"
103 "2:\n\t"
104 "cmp $680, %0\n\t"
105 "jb 5f\n\t"
106 "mov %1, %3\n\t"
107 "xor %2, %3\n\t"
108 "and $0xff, %3\n\t"
109 "jz 6b\n\t"
110
111 /*
112 * Calculate copy position to tail.
113 */
114 "5:\n\t"
115 "add %0, %1\n\t"
116 "add %0, %2\n\t"
117 "sub $0x10, %0\n\t"
118
119 /*
120 * We gobble 16byts backward in each loop.
121 */
122 "7:\n\t"
123 "sub $0x10, %0\n\t"
124
125 "mov -1*4(%1), %3\n\t"
126 "mov -2*4(%1), %4\n\t"
127 "mov %3, -1*4(%2)\n\t"
128 "mov %4, -2*4(%2)\n\t"
129 "mov -3*4(%1), %3\n\t"
130 "mov -4*4(%1), %4\n\t"
131 "mov %3, -3*4(%2)\n\t"
132 "mov %4, -4*4(%2)\n\t"
133 "lea -0x10(%1), %1\n\t"
134 "lea -0x10(%2), %2\n\t"
135 "jae 7b\n\t"
136 /*
137 * Calculate copy position to head.
138 */
139 "add $0x10, %0\n\t"
140 "sub %0, %1\n\t"
141 "sub %0, %2\n\t"
142
143 /*
144 * Move data from 8 bytes to 15 bytes.
145 */
146 ".p2align 4\n\t"
147 "1:\n\t"
148 "cmp $8, %0\n\t"
149 "jb 8f\n\t"
150 "mov 0*4(%1), %3\n\t"
151 "mov 1*4(%1), %4\n\t"
152 "mov -2*4(%1, %0), %5\n\t"
153 "mov -1*4(%1, %0), %1\n\t"
154
155 "mov %3, 0*4(%2)\n\t"
156 "mov %4, 1*4(%2)\n\t"
157 "mov %5, -2*4(%2, %0)\n\t"
158 "mov %1, -1*4(%2, %0)\n\t"
159 "jmp 11f\n\t"
160
161 /*
162 * Move data from 4 bytes to 7 bytes.
163 */
164 ".p2align 4\n\t"
165 "8:\n\t"
166 "cmp $4, %0\n\t"
167 "jb 9f\n\t"
168 "mov 0*4(%1), %3\n\t"
169 "mov -1*4(%1, %0), %4\n\t"
170 "mov %3, 0*4(%2)\n\t"
171 "mov %4, -1*4(%2, %0)\n\t"
172 "jmp 11f\n\t"
173
174 /*
175 * Move data from 2 bytes to 3 bytes.
176 */
177 ".p2align 4\n\t"
178 "9:\n\t"
179 "cmp $2, %0\n\t"
180 "jb 10f\n\t"
181 "movw 0*2(%1), %%dx\n\t"
182 "movw -1*2(%1, %0), %%bx\n\t"
183 "movw %%dx, 0*2(%2)\n\t"
184 "movw %%bx, -1*2(%2, %0)\n\t"
185 "jmp 11f\n\t"
186
187 /*
188 * Move data for 1 byte.
189 */
190 ".p2align 4\n\t"
191 "10:\n\t"
192 "cmp $1, %0\n\t"
193 "jb 11f\n\t"
194 "movb (%1), %%cl\n\t"
195 "movb %%cl, (%2)\n\t"
196 ".p2align 4\n\t"
197 "11:"
198 : "=&c" (d0), "=&S" (d1), "=&D" (d2),
199 "=r" (d3),"=r" (d4), "=r"(d5)
200 :"0" (n),
201 "1" (src),
202 "2" (dest)
203 :"memory");
204
205 return ret;
206
42} 207}
43EXPORT_SYMBOL(memmove); 208EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index bcbcd1e0f7d5..75ef61e35e38 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -40,84 +40,132 @@
40ENTRY(__memcpy) 40ENTRY(__memcpy)
41ENTRY(memcpy) 41ENTRY(memcpy)
42 CFI_STARTPROC 42 CFI_STARTPROC
43 movq %rdi, %rax
43 44
44 /* 45 /*
45 * Put the number of full 64-byte blocks into %ecx. 46 * Use 32bit CMP here to avoid long NOP padding.
46 * Tail portion is handled at the end:
47 */ 47 */
48 movq %rdi, %rax 48 cmp $0x20, %edx
49 movl %edx, %ecx 49 jb .Lhandle_tail
50 shrl $6, %ecx
51 jz .Lhandle_tail
52 50
53 .p2align 4
54.Lloop_64:
55 /* 51 /*
56 * We decrement the loop index here - and the zero-flag is 52 * We check whether memory false dependece could occur,
57 * checked at the end of the loop (instructions inbetween do 53 * then jump to corresponding copy mode.
58 * not change the zero flag):
59 */ 54 */
60 decl %ecx 55 cmp %dil, %sil
56 jl .Lcopy_backward
57 subl $0x20, %edx
58.Lcopy_forward_loop:
59 subq $0x20, %rdx
61 60
62 /* 61 /*
63 * Move in blocks of 4x16 bytes: 62 * Move in blocks of 4x8 bytes:
64 */ 63 */
65 movq 0*8(%rsi), %r11 64 movq 0*8(%rsi), %r8
66 movq 1*8(%rsi), %r8 65 movq 1*8(%rsi), %r9
67 movq %r11, 0*8(%rdi) 66 movq 2*8(%rsi), %r10
68 movq %r8, 1*8(%rdi) 67 movq 3*8(%rsi), %r11
69 68 leaq 4*8(%rsi), %rsi
70 movq 2*8(%rsi), %r9 69
71 movq 3*8(%rsi), %r10 70 movq %r8, 0*8(%rdi)
72 movq %r9, 2*8(%rdi) 71 movq %r9, 1*8(%rdi)
73 movq %r10, 3*8(%rdi) 72 movq %r10, 2*8(%rdi)
74 73 movq %r11, 3*8(%rdi)
75 movq 4*8(%rsi), %r11 74 leaq 4*8(%rdi), %rdi
76 movq 5*8(%rsi), %r8 75 jae .Lcopy_forward_loop
77 movq %r11, 4*8(%rdi) 76 addq $0x20, %rdx
78 movq %r8, 5*8(%rdi) 77 jmp .Lhandle_tail
79 78
80 movq 6*8(%rsi), %r9 79.Lcopy_backward:
81 movq 7*8(%rsi), %r10 80 /*
82 movq %r9, 6*8(%rdi) 81 * Calculate copy position to tail.
83 movq %r10, 7*8(%rdi) 82 */
84 83 addq %rdx, %rsi
85 leaq 64(%rsi), %rsi 84 addq %rdx, %rdi
86 leaq 64(%rdi), %rdi 85 subq $0x20, %rdx
87 86 /*
88 jnz .Lloop_64 87 * At most 3 ALU operations in one cycle,
88 * so append NOPS in the same 16bytes trunk.
89 */
90 .p2align 4
91.Lcopy_backward_loop:
92 subq $0x20, %rdx
93 movq -1*8(%rsi), %r8
94 movq -2*8(%rsi), %r9
95 movq -3*8(%rsi), %r10
96 movq -4*8(%rsi), %r11
97 leaq -4*8(%rsi), %rsi
98 movq %r8, -1*8(%rdi)
99 movq %r9, -2*8(%rdi)
100 movq %r10, -3*8(%rdi)
101 movq %r11, -4*8(%rdi)
102 leaq -4*8(%rdi), %rdi
103 jae .Lcopy_backward_loop
89 104
105 /*
106 * Calculate copy position to head.
107 */
108 addq $0x20, %rdx
109 subq %rdx, %rsi
110 subq %rdx, %rdi
90.Lhandle_tail: 111.Lhandle_tail:
91 movl %edx, %ecx 112 cmpq $16, %rdx
92 andl $63, %ecx 113 jb .Lless_16bytes
93 shrl $3, %ecx
94 jz .Lhandle_7
95 114
115 /*
116 * Move data from 16 bytes to 31 bytes.
117 */
118 movq 0*8(%rsi), %r8
119 movq 1*8(%rsi), %r9
120 movq -2*8(%rsi, %rdx), %r10
121 movq -1*8(%rsi, %rdx), %r11
122 movq %r8, 0*8(%rdi)
123 movq %r9, 1*8(%rdi)
124 movq %r10, -2*8(%rdi, %rdx)
125 movq %r11, -1*8(%rdi, %rdx)
126 retq
96 .p2align 4 127 .p2align 4
97.Lloop_8: 128.Lless_16bytes:
98 decl %ecx 129 cmpq $8, %rdx
99 movq (%rsi), %r8 130 jb .Lless_8bytes
100 movq %r8, (%rdi) 131 /*
101 leaq 8(%rdi), %rdi 132 * Move data from 8 bytes to 15 bytes.
102 leaq 8(%rsi), %rsi 133 */
103 jnz .Lloop_8 134 movq 0*8(%rsi), %r8
104 135 movq -1*8(%rsi, %rdx), %r9
105.Lhandle_7: 136 movq %r8, 0*8(%rdi)
106 movl %edx, %ecx 137 movq %r9, -1*8(%rdi, %rdx)
107 andl $7, %ecx 138 retq
108 jz .Lend 139 .p2align 4
140.Lless_8bytes:
141 cmpq $4, %rdx
142 jb .Lless_3bytes
109 143
144 /*
145 * Move data from 4 bytes to 7 bytes.
146 */
147 movl (%rsi), %ecx
148 movl -4(%rsi, %rdx), %r8d
149 movl %ecx, (%rdi)
150 movl %r8d, -4(%rdi, %rdx)
151 retq
110 .p2align 4 152 .p2align 4
153.Lless_3bytes:
154 cmpl $0, %edx
155 je .Lend
156 /*
157 * Move data from 1 bytes to 3 bytes.
158 */
111.Lloop_1: 159.Lloop_1:
112 movb (%rsi), %r8b 160 movb (%rsi), %r8b
113 movb %r8b, (%rdi) 161 movb %r8b, (%rdi)
114 incq %rdi 162 incq %rdi
115 incq %rsi 163 incq %rsi
116 decl %ecx 164 decl %edx
117 jnz .Lloop_1 165 jnz .Lloop_1
118 166
119.Lend: 167.Lend:
120 ret 168 retq
121 CFI_ENDPROC 169 CFI_ENDPROC
122ENDPROC(memcpy) 170ENDPROC(memcpy)
123ENDPROC(__memcpy) 171ENDPROC(__memcpy)
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
index 0a33909bf122..6d0f0ec41b34 100644
--- a/arch/x86/lib/memmove_64.c
+++ b/arch/x86/lib/memmove_64.c
@@ -8,14 +8,185 @@
8#undef memmove 8#undef memmove
9void *memmove(void *dest, const void *src, size_t count) 9void *memmove(void *dest, const void *src, size_t count)
10{ 10{
11 if (dest < src) { 11 unsigned long d0,d1,d2,d3,d4,d5,d6,d7;
12 return memcpy(dest, src, count); 12 char *ret;
13 } else { 13
14 char *p = dest + count; 14 __asm__ __volatile__(
15 const char *s = src + count; 15 /* Handle more 32bytes in loop */
16 while (count--) 16 "mov %2, %3\n\t"
17 *--p = *--s; 17 "cmp $0x20, %0\n\t"
18 } 18 "jb 1f\n\t"
19 return dest; 19
20 /* Decide forward/backward copy mode */
21 "cmp %2, %1\n\t"
22 "jb 2f\n\t"
23
24 /*
25 * movsq instruction have many startup latency
26 * so we handle small size by general register.
27 */
28 "cmp $680, %0\n\t"
29 "jb 3f\n\t"
30 /*
31 * movsq instruction is only good for aligned case.
32 */
33 "cmpb %%dil, %%sil\n\t"
34 "je 4f\n\t"
35 "3:\n\t"
36 "sub $0x20, %0\n\t"
37 /*
38 * We gobble 32byts forward in each loop.
39 */
40 "5:\n\t"
41 "sub $0x20, %0\n\t"
42 "movq 0*8(%1), %4\n\t"
43 "movq 1*8(%1), %5\n\t"
44 "movq 2*8(%1), %6\n\t"
45 "movq 3*8(%1), %7\n\t"
46 "leaq 4*8(%1), %1\n\t"
47
48 "movq %4, 0*8(%2)\n\t"
49 "movq %5, 1*8(%2)\n\t"
50 "movq %6, 2*8(%2)\n\t"
51 "movq %7, 3*8(%2)\n\t"
52 "leaq 4*8(%2), %2\n\t"
53 "jae 5b\n\t"
54 "addq $0x20, %0\n\t"
55 "jmp 1f\n\t"
56 /*
57 * Handle data forward by movsq.
58 */
59 ".p2align 4\n\t"
60 "4:\n\t"
61 "movq %0, %8\n\t"
62 "movq -8(%1, %0), %4\n\t"
63 "lea -8(%2, %0), %5\n\t"
64 "shrq $3, %8\n\t"
65 "rep movsq\n\t"
66 "movq %4, (%5)\n\t"
67 "jmp 13f\n\t"
68 /*
69 * Handle data backward by movsq.
70 */
71 ".p2align 4\n\t"
72 "7:\n\t"
73 "movq %0, %8\n\t"
74 "movq (%1), %4\n\t"
75 "movq %2, %5\n\t"
76 "leaq -8(%1, %0), %1\n\t"
77 "leaq -8(%2, %0), %2\n\t"
78 "shrq $3, %8\n\t"
79 "std\n\t"
80 "rep movsq\n\t"
81 "cld\n\t"
82 "movq %4, (%5)\n\t"
83 "jmp 13f\n\t"
84
85 /*
86 * Start to prepare for backward copy.
87 */
88 ".p2align 4\n\t"
89 "2:\n\t"
90 "cmp $680, %0\n\t"
91 "jb 6f \n\t"
92 "cmp %%dil, %%sil\n\t"
93 "je 7b \n\t"
94 "6:\n\t"
95 /*
96 * Calculate copy position to tail.
97 */
98 "addq %0, %1\n\t"
99 "addq %0, %2\n\t"
100 "subq $0x20, %0\n\t"
101 /*
102 * We gobble 32byts backward in each loop.
103 */
104 "8:\n\t"
105 "subq $0x20, %0\n\t"
106 "movq -1*8(%1), %4\n\t"
107 "movq -2*8(%1), %5\n\t"
108 "movq -3*8(%1), %6\n\t"
109 "movq -4*8(%1), %7\n\t"
110 "leaq -4*8(%1), %1\n\t"
111
112 "movq %4, -1*8(%2)\n\t"
113 "movq %5, -2*8(%2)\n\t"
114 "movq %6, -3*8(%2)\n\t"
115 "movq %7, -4*8(%2)\n\t"
116 "leaq -4*8(%2), %2\n\t"
117 "jae 8b\n\t"
118 /*
119 * Calculate copy position to head.
120 */
121 "addq $0x20, %0\n\t"
122 "subq %0, %1\n\t"
123 "subq %0, %2\n\t"
124 "1:\n\t"
125 "cmpq $16, %0\n\t"
126 "jb 9f\n\t"
127 /*
128 * Move data from 16 bytes to 31 bytes.
129 */
130 "movq 0*8(%1), %4\n\t"
131 "movq 1*8(%1), %5\n\t"
132 "movq -2*8(%1, %0), %6\n\t"
133 "movq -1*8(%1, %0), %7\n\t"
134 "movq %4, 0*8(%2)\n\t"
135 "movq %5, 1*8(%2)\n\t"
136 "movq %6, -2*8(%2, %0)\n\t"
137 "movq %7, -1*8(%2, %0)\n\t"
138 "jmp 13f\n\t"
139 ".p2align 4\n\t"
140 "9:\n\t"
141 "cmpq $8, %0\n\t"
142 "jb 10f\n\t"
143 /*
144 * Move data from 8 bytes to 15 bytes.
145 */
146 "movq 0*8(%1), %4\n\t"
147 "movq -1*8(%1, %0), %5\n\t"
148 "movq %4, 0*8(%2)\n\t"
149 "movq %5, -1*8(%2, %0)\n\t"
150 "jmp 13f\n\t"
151 "10:\n\t"
152 "cmpq $4, %0\n\t"
153 "jb 11f\n\t"
154 /*
155 * Move data from 4 bytes to 7 bytes.
156 */
157 "movl (%1), %4d\n\t"
158 "movl -4(%1, %0), %5d\n\t"
159 "movl %4d, (%2)\n\t"
160 "movl %5d, -4(%2, %0)\n\t"
161 "jmp 13f\n\t"
162 "11:\n\t"
163 "cmp $2, %0\n\t"
164 "jb 12f\n\t"
165 /*
166 * Move data from 2 bytes to 3 bytes.
167 */
168 "movw (%1), %4w\n\t"
169 "movw -2(%1, %0), %5w\n\t"
170 "movw %4w, (%2)\n\t"
171 "movw %5w, -2(%2, %0)\n\t"
172 "jmp 13f\n\t"
173 "12:\n\t"
174 "cmp $1, %0\n\t"
175 "jb 13f\n\t"
176 /*
177 * Move data for 1 byte.
178 */
179 "movb (%1), %4b\n\t"
180 "movb %4b, (%2)\n\t"
181 "13:\n\t"
182 : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) ,
183 "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7)
184 :"0" (count),
185 "1" (src),
186 "2" (dest)
187 :"memory");
188
189 return ret;
190
20} 191}
21EXPORT_SYMBOL(memmove); 192EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4c4508e8a204..79b0b372d2d0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -229,7 +229,16 @@ void vmalloc_sync_all(void)
229 229
230 spin_lock_irqsave(&pgd_lock, flags); 230 spin_lock_irqsave(&pgd_lock, flags);
231 list_for_each_entry(page, &pgd_list, lru) { 231 list_for_each_entry(page, &pgd_list, lru) {
232 if (!vmalloc_sync_one(page_address(page), address)) 232 spinlock_t *pgt_lock;
233 pmd_t *ret;
234
235 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
236
237 spin_lock(pgt_lock);
238 ret = vmalloc_sync_one(page_address(page), address);
239 spin_unlock(pgt_lock);
240
241 if (!ret)
233 break; 242 break;
234 } 243 }
235 spin_unlock_irqrestore(&pgd_lock, flags); 244 spin_unlock_irqrestore(&pgd_lock, flags);
@@ -251,6 +260,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
251 if (!(address >= VMALLOC_START && address < VMALLOC_END)) 260 if (!(address >= VMALLOC_START && address < VMALLOC_END))
252 return -1; 261 return -1;
253 262
263 WARN_ON_ONCE(in_nmi());
264
254 /* 265 /*
255 * Synchronize this task's top level page-table 266 * Synchronize this task's top level page-table
256 * with the 'reference' page table. 267 * with the 'reference' page table.
@@ -326,29 +337,7 @@ out:
326 337
327void vmalloc_sync_all(void) 338void vmalloc_sync_all(void)
328{ 339{
329 unsigned long address; 340 sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
330
331 for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
332 address += PGDIR_SIZE) {
333
334 const pgd_t *pgd_ref = pgd_offset_k(address);
335 unsigned long flags;
336 struct page *page;
337
338 if (pgd_none(*pgd_ref))
339 continue;
340
341 spin_lock_irqsave(&pgd_lock, flags);
342 list_for_each_entry(page, &pgd_list, lru) {
343 pgd_t *pgd;
344 pgd = (pgd_t *)page_address(page) + pgd_index(address);
345 if (pgd_none(*pgd))
346 set_pgd(pgd, *pgd_ref);
347 else
348 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
349 }
350 spin_unlock_irqrestore(&pgd_lock, flags);
351 }
352} 341}
353 342
354/* 343/*
@@ -369,6 +358,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
369 if (!(address >= VMALLOC_START && address < VMALLOC_END)) 358 if (!(address >= VMALLOC_START && address < VMALLOC_END))
370 return -1; 359 return -1;
371 360
361 WARN_ON_ONCE(in_nmi());
362
372 /* 363 /*
373 * Copy kernel mappings over when needed. This can also 364 * Copy kernel mappings over when needed. This can also
374 * happen within a race in page table update. In the later 365 * happen within a race in page table update. In the later
@@ -894,8 +885,14 @@ spurious_fault(unsigned long error_code, unsigned long address)
894 if (pmd_large(*pmd)) 885 if (pmd_large(*pmd))
895 return spurious_fault_check(error_code, (pte_t *) pmd); 886 return spurious_fault_check(error_code, (pte_t *) pmd);
896 887
888 /*
889 * Note: don't use pte_present() here, since it returns true
890 * if the _PAGE_PROTNONE bit is set. However, this aliases the
891 * _PAGE_GLOBAL bit, which for kernel pages give false positives
892 * when CONFIG_DEBUG_PAGEALLOC is used.
893 */
897 pte = pte_offset_kernel(pmd, address); 894 pte = pte_offset_kernel(pmd, address);
898 if (!pte_present(*pte)) 895 if (!(pte_flags(*pte) & _PAGE_PRESENT))
899 return 0; 896 return 0;
900 897
901 ret = spurious_fault_check(error_code, pte); 898 ret = spurious_fault_check(error_code, pte);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index bca79091b9d6..558f2d332076 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -67,7 +67,7 @@ static __init void *alloc_low_page(void)
67 panic("alloc_low_page: ran out of memory"); 67 panic("alloc_low_page: ran out of memory");
68 68
69 adr = __va(pfn * PAGE_SIZE); 69 adr = __va(pfn * PAGE_SIZE);
70 memset(adr, 0, PAGE_SIZE); 70 clear_page(adr);
71 return adr; 71 return adr;
72} 72}
73 73
@@ -558,7 +558,7 @@ char swsusp_pg_dir[PAGE_SIZE]
558 558
559static inline void save_pg_dir(void) 559static inline void save_pg_dir(void)
560{ 560{
561 memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); 561 copy_page(swsusp_pg_dir, swapper_pg_dir);
562} 562}
563#else /* !CONFIG_ACPI_SLEEP */ 563#else /* !CONFIG_ACPI_SLEEP */
564static inline void save_pg_dir(void) 564static inline void save_pg_dir(void)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9a6674689a20..c55f900fbf89 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -98,6 +98,43 @@ static int __init nonx32_setup(char *str)
98__setup("noexec32=", nonx32_setup); 98__setup("noexec32=", nonx32_setup);
99 99
100/* 100/*
101 * When memory was added/removed make sure all the processes MM have
102 * suitable PGD entries in the local PGD level page.
103 */
104void sync_global_pgds(unsigned long start, unsigned long end)
105{
106 unsigned long address;
107
108 for (address = start; address <= end; address += PGDIR_SIZE) {
109 const pgd_t *pgd_ref = pgd_offset_k(address);
110 unsigned long flags;
111 struct page *page;
112
113 if (pgd_none(*pgd_ref))
114 continue;
115
116 spin_lock_irqsave(&pgd_lock, flags);
117 list_for_each_entry(page, &pgd_list, lru) {
118 pgd_t *pgd;
119 spinlock_t *pgt_lock;
120
121 pgd = (pgd_t *)page_address(page) + pgd_index(address);
122 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
123 spin_lock(pgt_lock);
124
125 if (pgd_none(*pgd))
126 set_pgd(pgd, *pgd_ref);
127 else
128 BUG_ON(pgd_page_vaddr(*pgd)
129 != pgd_page_vaddr(*pgd_ref));
130
131 spin_unlock(pgt_lock);
132 }
133 spin_unlock_irqrestore(&pgd_lock, flags);
134 }
135}
136
137/*
101 * NOTE: This function is marked __ref because it calls __init function 138 * NOTE: This function is marked __ref because it calls __init function
102 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. 139 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
103 */ 140 */
@@ -293,7 +330,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
293 panic("alloc_low_page: ran out of memory"); 330 panic("alloc_low_page: ran out of memory");
294 331
295 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); 332 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
296 memset(adr, 0, PAGE_SIZE); 333 clear_page(adr);
297 *phys = pfn * PAGE_SIZE; 334 *phys = pfn * PAGE_SIZE;
298 return adr; 335 return adr;
299} 336}
@@ -534,11 +571,13 @@ kernel_physical_mapping_init(unsigned long start,
534 unsigned long end, 571 unsigned long end,
535 unsigned long page_size_mask) 572 unsigned long page_size_mask)
536{ 573{
537 574 bool pgd_changed = false;
538 unsigned long next, last_map_addr = end; 575 unsigned long next, last_map_addr = end;
576 unsigned long addr;
539 577
540 start = (unsigned long)__va(start); 578 start = (unsigned long)__va(start);
541 end = (unsigned long)__va(end); 579 end = (unsigned long)__va(end);
580 addr = start;
542 581
543 for (; start < end; start = next) { 582 for (; start < end; start = next) {
544 pgd_t *pgd = pgd_offset_k(start); 583 pgd_t *pgd = pgd_offset_k(start);
@@ -563,7 +602,12 @@ kernel_physical_mapping_init(unsigned long start,
563 spin_lock(&init_mm.page_table_lock); 602 spin_lock(&init_mm.page_table_lock);
564 pgd_populate(&init_mm, pgd, __va(pud_phys)); 603 pgd_populate(&init_mm, pgd, __va(pud_phys));
565 spin_unlock(&init_mm.page_table_lock); 604 spin_unlock(&init_mm.page_table_lock);
605 pgd_changed = true;
566 } 606 }
607
608 if (pgd_changed)
609 sync_global_pgds(addr, end);
610
567 __flush_tlb_all(); 611 __flush_tlb_all();
568 612
569 return last_map_addr; 613 return last_map_addr;
@@ -1003,6 +1047,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
1003 } 1047 }
1004 1048
1005 } 1049 }
1050 sync_global_pgds((unsigned long)start_page, end);
1006 return 0; 1051 return 0;
1007} 1052}
1008 1053
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 970ed579d4e4..52d54bfc1ebb 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -22,7 +22,7 @@
22#include <asm/numa.h> 22#include <asm/numa.h>
23#include <asm/mpspec.h> 23#include <asm/mpspec.h>
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/k8.h> 25#include <asm/amd_nb.h>
26 26
27static struct bootnode __initdata nodes[8]; 27static struct bootnode __initdata nodes[8];
28static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; 28static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
@@ -54,8 +54,8 @@ static __init int find_northbridge(void)
54static __init void early_get_boot_cpu_id(void) 54static __init void early_get_boot_cpu_id(void)
55{ 55{
56 /* 56 /*
57 * need to get boot_cpu_id so can use that to create apicid_to_node 57 * need to get the APIC ID of the BSP so can use that to
58 * in k8_scan_nodes() 58 * create apicid_to_node in k8_scan_nodes()
59 */ 59 */
60#ifdef CONFIG_X86_MPPARSE 60#ifdef CONFIG_X86_MPPARSE
61 /* 61 /*
@@ -212,7 +212,7 @@ int __init k8_scan_nodes(void)
212 bits = boot_cpu_data.x86_coreid_bits; 212 bits = boot_cpu_data.x86_coreid_bits;
213 cores = (1<<bits); 213 cores = (1<<bits);
214 apicid_base = 0; 214 apicid_base = 0;
215 /* need to get boot_cpu_id early for system with apicid lifting */ 215 /* get the APIC ID of the BSP early for systems with apicid lifting */
216 early_get_boot_cpu_id(); 216 early_get_boot_cpu_id();
217 if (boot_cpu_physical_apicid > 0) { 217 if (boot_cpu_physical_apicid > 0) {
218 pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); 218 pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index b3b531a4f8e5..d87dd6d042d6 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
631 if (!pte) 631 if (!pte)
632 return false; 632 return false;
633 633
634 WARN_ON_ONCE(in_nmi());
635
634 if (error_code & 2) 636 if (error_code & 2)
635 kmemcheck_access(regs, address, KMEMCHECK_WRITE); 637 kmemcheck_access(regs, address, KMEMCHECK_WRITE);
636 else 638 else
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c
index 63c19e27aa6f..324aa3f07237 100644
--- a/arch/x86/mm/kmemcheck/opcode.c
+++ b/arch/x86/mm/kmemcheck/opcode.c
@@ -9,7 +9,7 @@ static bool opcode_is_prefix(uint8_t b)
9 b == 0xf0 || b == 0xf2 || b == 0xf3 9 b == 0xf0 || b == 0xf2 || b == 0xf3
10 /* Group 2 */ 10 /* Group 2 */
11 || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 11 || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26
12 || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e 12 || b == 0x64 || b == 0x65
13 /* Group 3 */ 13 /* Group 3 */
14 || b == 0x66 14 || b == 0x66
15 /* Group 4 */ 15 /* Group 4 */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a7bcc23ef96c..4962f1aeda6f 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -18,7 +18,7 @@
18#include <asm/dma.h> 18#include <asm/dma.h>
19#include <asm/numa.h> 19#include <asm/numa.h>
20#include <asm/acpi.h> 20#include <asm/acpi.h>
21#include <asm/k8.h> 21#include <asm/amd_nb.h>
22 22
23struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 23struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
24EXPORT_SYMBOL(node_data); 24EXPORT_SYMBOL(node_data);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 5c4ee422590e..8be8c7d7bc89 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -87,7 +87,19 @@ static inline void pgd_list_del(pgd_t *pgd)
87#define UNSHARED_PTRS_PER_PGD \ 87#define UNSHARED_PTRS_PER_PGD \
88 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) 88 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
89 89
90static void pgd_ctor(pgd_t *pgd) 90
91static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
92{
93 BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
94 virt_to_page(pgd)->index = (pgoff_t)mm;
95}
96
97struct mm_struct *pgd_page_get_mm(struct page *page)
98{
99 return (struct mm_struct *)page->index;
100}
101
102static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
91{ 103{
92 /* If the pgd points to a shared pagetable level (either the 104 /* If the pgd points to a shared pagetable level (either the
93 ptes in non-PAE, or shared PMD in PAE), then just copy the 105 ptes in non-PAE, or shared PMD in PAE), then just copy the
@@ -98,15 +110,13 @@ static void pgd_ctor(pgd_t *pgd)
98 clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, 110 clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
99 swapper_pg_dir + KERNEL_PGD_BOUNDARY, 111 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
100 KERNEL_PGD_PTRS); 112 KERNEL_PGD_PTRS);
101 paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
102 __pa(swapper_pg_dir) >> PAGE_SHIFT,
103 KERNEL_PGD_BOUNDARY,
104 KERNEL_PGD_PTRS);
105 } 113 }
106 114
107 /* list required to sync kernel mapping updates */ 115 /* list required to sync kernel mapping updates */
108 if (!SHARED_KERNEL_PMD) 116 if (!SHARED_KERNEL_PMD) {
117 pgd_set_mm(pgd, mm);
109 pgd_list_add(pgd); 118 pgd_list_add(pgd);
119 }
110} 120}
111 121
112static void pgd_dtor(pgd_t *pgd) 122static void pgd_dtor(pgd_t *pgd)
@@ -272,7 +282,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
272 */ 282 */
273 spin_lock_irqsave(&pgd_lock, flags); 283 spin_lock_irqsave(&pgd_lock, flags);
274 284
275 pgd_ctor(pgd); 285 pgd_ctor(mm, pgd);
276 pgd_prepopulate_pmd(mm, pgd, pmds); 286 pgd_prepopulate_pmd(mm, pgd, pmds);
277 287
278 spin_unlock_irqrestore(&pgd_lock, flags); 288 spin_unlock_irqrestore(&pgd_lock, flags);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index f9897f7a9ef1..9c0d0d399c30 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -420,9 +420,11 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
420 return -1; 420 return -1;
421 } 421 }
422 422
423 for_each_node_mask(i, nodes_parsed) 423 for (i = 0; i < num_node_memblks; i++)
424 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, 424 e820_register_active_regions(memblk_nodeid[i],
425 nodes[i].end >> PAGE_SHIFT); 425 node_memblk_range[i].start >> PAGE_SHIFT,
426 node_memblk_range[i].end >> PAGE_SHIFT);
427
426 /* for out of order entries in SRAT */ 428 /* for out of order entries in SRAT */
427 sort_node_map(); 429 sort_node_map();
428 if (!nodes_cover_memory(nodes)) { 430 if (!nodes_cover_memory(nodes)) {
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index c03f14ab6667..49358481c733 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -5,6 +5,7 @@
5#include <linux/smp.h> 5#include <linux/smp.h>
6#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/cpu.h>
8 9
9#include <asm/tlbflush.h> 10#include <asm/tlbflush.h>
10#include <asm/mmu_context.h> 11#include <asm/mmu_context.h>
@@ -52,6 +53,8 @@ union smp_flush_state {
52 want false sharing in the per cpu data segment. */ 53 want false sharing in the per cpu data segment. */
53static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; 54static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
54 55
56static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
57
55/* 58/*
56 * We cannot call mmdrop() because we are in interrupt context, 59 * We cannot call mmdrop() because we are in interrupt context,
57 * instead update mm->cpu_vm_mask. 60 * instead update mm->cpu_vm_mask.
@@ -173,7 +176,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
173 union smp_flush_state *f; 176 union smp_flush_state *f;
174 177
175 /* Caller has disabled preemption */ 178 /* Caller has disabled preemption */
176 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; 179 sender = this_cpu_read(tlb_vector_offset);
177 f = &flush_state[sender]; 180 f = &flush_state[sender];
178 181
179 /* 182 /*
@@ -218,6 +221,47 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
218 flush_tlb_others_ipi(cpumask, mm, va); 221 flush_tlb_others_ipi(cpumask, mm, va);
219} 222}
220 223
224static void __cpuinit calculate_tlb_offset(void)
225{
226 int cpu, node, nr_node_vecs;
227 /*
228 * we are changing tlb_vector_offset for each CPU in runtime, but this
229 * will not cause inconsistency, as the write is atomic under X86. we
230 * might see more lock contentions in a short time, but after all CPU's
231 * tlb_vector_offset are changed, everything should go normal
232 *
233 * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might
234 * waste some vectors.
235 **/
236 if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS)
237 nr_node_vecs = 1;
238 else
239 nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
240
241 for_each_online_node(node) {
242 int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) *
243 nr_node_vecs;
244 int cpu_offset = 0;
245 for_each_cpu(cpu, cpumask_of_node(node)) {
246 per_cpu(tlb_vector_offset, cpu) = node_offset +
247 cpu_offset;
248 cpu_offset++;
249 cpu_offset = cpu_offset % nr_node_vecs;
250 }
251 }
252}
253
254static int tlb_cpuhp_notify(struct notifier_block *n,
255 unsigned long action, void *hcpu)
256{
257 switch (action & 0xf) {
258 case CPU_ONLINE:
259 case CPU_DEAD:
260 calculate_tlb_offset();
261 }
262 return NOTIFY_OK;
263}
264
221static int __cpuinit init_smp_flush(void) 265static int __cpuinit init_smp_flush(void)
222{ 266{
223 int i; 267 int i;
@@ -225,6 +269,8 @@ static int __cpuinit init_smp_flush(void)
225 for (i = 0; i < ARRAY_SIZE(flush_state); i++) 269 for (i = 0; i < ARRAY_SIZE(flush_state); i++)
226 raw_spin_lock_init(&flush_state[i].tlbstate_lock); 270 raw_spin_lock_init(&flush_state[i].tlbstate_lock);
227 271
272 calculate_tlb_offset();
273 hotcpu_notifier(tlb_cpuhp_notify, 0);
228 return 0; 274 return 0;
229} 275}
230core_initcall(init_smp_flush); 276core_initcall(init_smp_flush);
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 3855096c59b8..2d49d4e19a36 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -14,6 +14,7 @@
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/uaccess.h> 15#include <asm/uaccess.h>
16#include <asm/stacktrace.h> 16#include <asm/stacktrace.h>
17#include <linux/compat.h>
17 18
18static void backtrace_warning_symbol(void *data, char *msg, 19static void backtrace_warning_symbol(void *data, char *msg,
19 unsigned long symbol) 20 unsigned long symbol)
@@ -48,14 +49,12 @@ static struct stacktrace_ops backtrace_ops = {
48 .walk_stack = print_context_stack, 49 .walk_stack = print_context_stack,
49}; 50};
50 51
51struct frame_head { 52#ifdef CONFIG_COMPAT
52 struct frame_head *bp; 53static struct stack_frame_ia32 *
53 unsigned long ret; 54dump_user_backtrace_32(struct stack_frame_ia32 *head)
54} __attribute__((packed));
55
56static struct frame_head *dump_user_backtrace(struct frame_head *head)
57{ 55{
58 struct frame_head bufhead[2]; 56 struct stack_frame_ia32 bufhead[2];
57 struct stack_frame_ia32 *fp;
59 58
60 /* Also check accessibility of one struct frame_head beyond */ 59 /* Also check accessibility of one struct frame_head beyond */
61 if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) 60 if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
@@ -63,20 +62,66 @@ static struct frame_head *dump_user_backtrace(struct frame_head *head)
63 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) 62 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
64 return NULL; 63 return NULL;
65 64
66 oprofile_add_trace(bufhead[0].ret); 65 fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
66
67 oprofile_add_trace(bufhead[0].return_address);
68
69 /* frame pointers should strictly progress back up the stack
70 * (towards higher addresses) */
71 if (head >= fp)
72 return NULL;
73
74 return fp;
75}
76
77static inline int
78x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
79{
80 struct stack_frame_ia32 *head;
81
82 /* User process is 32-bit */
83 if (!current || !test_thread_flag(TIF_IA32))
84 return 0;
85
86 head = (struct stack_frame_ia32 *) regs->bp;
87 while (depth-- && head)
88 head = dump_user_backtrace_32(head);
89
90 return 1;
91}
92
93#else
94static inline int
95x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
96{
97 return 0;
98}
99#endif /* CONFIG_COMPAT */
100
101static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
102{
103 struct stack_frame bufhead[2];
104
105 /* Also check accessibility of one struct stack_frame beyond */
106 if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
107 return NULL;
108 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
109 return NULL;
110
111 oprofile_add_trace(bufhead[0].return_address);
67 112
68 /* frame pointers should strictly progress back up the stack 113 /* frame pointers should strictly progress back up the stack
69 * (towards higher addresses) */ 114 * (towards higher addresses) */
70 if (head >= bufhead[0].bp) 115 if (head >= bufhead[0].next_frame)
71 return NULL; 116 return NULL;
72 117
73 return bufhead[0].bp; 118 return bufhead[0].next_frame;
74} 119}
75 120
76void 121void
77x86_backtrace(struct pt_regs * const regs, unsigned int depth) 122x86_backtrace(struct pt_regs * const regs, unsigned int depth)
78{ 123{
79 struct frame_head *head = (struct frame_head *)frame_pointer(regs); 124 struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
80 125
81 if (!user_mode_vm(regs)) { 126 if (!user_mode_vm(regs)) {
82 unsigned long stack = kernel_stack_pointer(regs); 127 unsigned long stack = kernel_stack_pointer(regs);
@@ -86,6 +131,9 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
86 return; 131 return;
87 } 132 }
88 133
134 if (x86_backtrace_32(regs, depth))
135 return;
136
89 while (depth-- && head) 137 while (depth-- && head)
90 head = dump_user_backtrace(head); 138 head = dump_user_backtrace(head);
91} 139}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f1575c9a2572..bd1489c3ce09 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -695,9 +695,6 @@ static int __init ppro_init(char **cpu_type)
695 return 1; 695 return 1;
696} 696}
697 697
698/* in order to get sysfs right */
699static int using_nmi;
700
701int __init op_nmi_init(struct oprofile_operations *ops) 698int __init op_nmi_init(struct oprofile_operations *ops)
702{ 699{
703 __u8 vendor = boot_cpu_data.x86_vendor; 700 __u8 vendor = boot_cpu_data.x86_vendor;
@@ -705,8 +702,6 @@ int __init op_nmi_init(struct oprofile_operations *ops)
705 char *cpu_type = NULL; 702 char *cpu_type = NULL;
706 int ret = 0; 703 int ret = 0;
707 704
708 using_nmi = 0;
709
710 if (!cpu_has_apic) 705 if (!cpu_has_apic)
711 return -ENODEV; 706 return -ENODEV;
712 707
@@ -790,13 +785,11 @@ int __init op_nmi_init(struct oprofile_operations *ops)
790 if (ret) 785 if (ret)
791 return ret; 786 return ret;
792 787
793 using_nmi = 1;
794 printk(KERN_INFO "oprofile: using NMI interrupt.\n"); 788 printk(KERN_INFO "oprofile: using NMI interrupt.\n");
795 return 0; 789 return 0;
796} 790}
797 791
798void op_nmi_exit(void) 792void op_nmi_exit(void)
799{ 793{
800 if (using_nmi) 794 exit_sysfs();
801 exit_sysfs();
802} 795}
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index b67a6b5aa8d4..42fb46f83883 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -64,15 +64,22 @@ static u64 ibs_op_ctl;
64 * IBS cpuid feature detection 64 * IBS cpuid feature detection
65 */ 65 */
66 66
67#define IBS_CPUID_FEATURES 0x8000001b 67#define IBS_CPUID_FEATURES 0x8000001b
68 68
69/* 69/*
70 * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but 70 * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
71 * bit 0 is used to indicate the existence of IBS. 71 * bit 0 is used to indicate the existence of IBS.
72 */ 72 */
73#define IBS_CAPS_AVAIL (1LL<<0) 73#define IBS_CAPS_AVAIL (1U<<0)
74#define IBS_CAPS_RDWROPCNT (1LL<<3) 74#define IBS_CAPS_RDWROPCNT (1U<<3)
75#define IBS_CAPS_OPCNT (1LL<<4) 75#define IBS_CAPS_OPCNT (1U<<4)
76
77/*
78 * IBS APIC setup
79 */
80#define IBSCTL 0x1cc
81#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
82#define IBSCTL_LVT_OFFSET_MASK 0x0F
76 83
77/* 84/*
78 * IBS randomization macros 85 * IBS randomization macros
@@ -266,6 +273,74 @@ static void op_amd_stop_ibs(void)
266 wrmsrl(MSR_AMD64_IBSOPCTL, 0); 273 wrmsrl(MSR_AMD64_IBSOPCTL, 0);
267} 274}
268 275
276static inline int eilvt_is_available(int offset)
277{
278 /* check if we may assign a vector */
279 return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
280}
281
282static inline int ibs_eilvt_valid(void)
283{
284 u64 val;
285 int offset;
286
287 rdmsrl(MSR_AMD64_IBSCTL, val);
288 if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
289 pr_err(FW_BUG "cpu %d, invalid IBS "
290 "interrupt offset %d (MSR%08X=0x%016llx)",
291 smp_processor_id(), offset,
292 MSR_AMD64_IBSCTL, val);
293 return 0;
294 }
295
296 offset = val & IBSCTL_LVT_OFFSET_MASK;
297
298 if (eilvt_is_available(offset))
299 return !0;
300
301 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d "
302 "not available (MSR%08X=0x%016llx)",
303 smp_processor_id(), offset,
304 MSR_AMD64_IBSCTL, val);
305
306 return 0;
307}
308
309static inline int get_ibs_offset(void)
310{
311 u64 val;
312
313 rdmsrl(MSR_AMD64_IBSCTL, val);
314 if (!(val & IBSCTL_LVT_OFFSET_VALID))
315 return -EINVAL;
316
317 return val & IBSCTL_LVT_OFFSET_MASK;
318}
319
320static void setup_APIC_ibs(void)
321{
322 int offset;
323
324 offset = get_ibs_offset();
325 if (offset < 0)
326 goto failed;
327
328 if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
329 return;
330failed:
331 pr_warn("oprofile: IBS APIC setup failed on cpu #%d\n",
332 smp_processor_id());
333}
334
335static void clear_APIC_ibs(void)
336{
337 int offset;
338
339 offset = get_ibs_offset();
340 if (offset >= 0)
341 setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
342}
343
269#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 344#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
270 345
271static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, 346static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
@@ -376,13 +451,13 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
376 } 451 }
377 452
378 if (ibs_caps) 453 if (ibs_caps)
379 setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); 454 setup_APIC_ibs();
380} 455}
381 456
382static void op_amd_cpu_shutdown(void) 457static void op_amd_cpu_shutdown(void)
383{ 458{
384 if (ibs_caps) 459 if (ibs_caps)
385 setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); 460 clear_APIC_ibs();
386} 461}
387 462
388static int op_amd_check_ctrs(struct pt_regs * const regs, 463static int op_amd_check_ctrs(struct pt_regs * const regs,
@@ -445,16 +520,11 @@ static void op_amd_stop(struct op_msrs const * const msrs)
445 op_amd_stop_ibs(); 520 op_amd_stop_ibs();
446} 521}
447 522
448static int __init_ibs_nmi(void) 523static int setup_ibs_ctl(int ibs_eilvt_off)
449{ 524{
450#define IBSCTL_LVTOFFSETVAL (1 << 8)
451#define IBSCTL 0x1cc
452 struct pci_dev *cpu_cfg; 525 struct pci_dev *cpu_cfg;
453 int nodes; 526 int nodes;
454 u32 value = 0; 527 u32 value = 0;
455 u8 ibs_eilvt_off;
456
457 ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1);
458 528
459 nodes = 0; 529 nodes = 0;
460 cpu_cfg = NULL; 530 cpu_cfg = NULL;
@@ -466,21 +536,60 @@ static int __init_ibs_nmi(void)
466 break; 536 break;
467 ++nodes; 537 ++nodes;
468 pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off 538 pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
469 | IBSCTL_LVTOFFSETVAL); 539 | IBSCTL_LVT_OFFSET_VALID);
470 pci_read_config_dword(cpu_cfg, IBSCTL, &value); 540 pci_read_config_dword(cpu_cfg, IBSCTL, &value);
471 if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { 541 if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
472 pci_dev_put(cpu_cfg); 542 pci_dev_put(cpu_cfg);
473 printk(KERN_DEBUG "Failed to setup IBS LVT offset, " 543 printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
474 "IBSCTL = 0x%08x", value); 544 "IBSCTL = 0x%08x\n", value);
475 return 1; 545 return -EINVAL;
476 } 546 }
477 } while (1); 547 } while (1);
478 548
479 if (!nodes) { 549 if (!nodes) {
480 printk(KERN_DEBUG "No CPU node configured for IBS"); 550 printk(KERN_DEBUG "No CPU node configured for IBS\n");
481 return 1; 551 return -ENODEV;
552 }
553
554 return 0;
555}
556
557static int force_ibs_eilvt_setup(void)
558{
559 int i;
560 int ret;
561
562 /* find the next free available EILVT entry */
563 for (i = 1; i < 4; i++) {
564 if (!eilvt_is_available(i))
565 continue;
566 ret = setup_ibs_ctl(i);
567 if (ret)
568 return ret;
569 return 0;
482 } 570 }
483 571
572 printk(KERN_DEBUG "No EILVT entry available\n");
573
574 return -EBUSY;
575}
576
577static int __init_ibs_nmi(void)
578{
579 int ret;
580
581 if (ibs_eilvt_valid())
582 return 0;
583
584 ret = force_ibs_eilvt_setup();
585 if (ret)
586 return ret;
587
588 if (!ibs_eilvt_valid())
589 return -EFAULT;
590
591 pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
592
484 return 0; 593 return 0;
485} 594}
486 595
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
index b34815408f58..13700ec8e2e4 100644
--- a/arch/x86/pci/olpc.c
+++ b/arch/x86/pci/olpc.c
@@ -304,7 +304,7 @@ static struct pci_raw_ops pci_olpc_conf = {
304 304
305int __init pci_olpc_init(void) 305int __init pci_olpc_init(void)
306{ 306{
307 printk(KERN_INFO "PCI: Using configuration type OLPC\n"); 307 printk(KERN_INFO "PCI: Using configuration type OLPC XO-1\n");
308 raw_pci_ops = &pci_olpc_conf; 308 raw_pci_ops = &pci_olpc_conf;
309 is_lx = is_geode_lx(); 309 is_lx = is_geode_lx();
310 return 0; 310 return 0;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 42086ac406af..b2363fcbcd0f 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1969,7 +1969,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1969 .alloc_pte = xen_alloc_pte_init, 1969 .alloc_pte = xen_alloc_pte_init,
1970 .release_pte = xen_release_pte_init, 1970 .release_pte = xen_release_pte_init,
1971 .alloc_pmd = xen_alloc_pmd_init, 1971 .alloc_pmd = xen_alloc_pmd_init,
1972 .alloc_pmd_clone = paravirt_nop,
1973 .release_pmd = xen_release_pmd_init, 1972 .release_pmd = xen_release_pmd_init,
1974 1973
1975#ifdef CONFIG_X86_64 1974#ifdef CONFIG_X86_64
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index a013ec9d0c54..22471001b74c 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -5,6 +5,7 @@
5 5
6#include <asm/xen/hypervisor.h> 6#include <asm/xen/hypervisor.h>
7#include <xen/xen.h> 7#include <xen/xen.h>
8#include <asm/iommu_table.h>
8 9
9int xen_swiotlb __read_mostly; 10int xen_swiotlb __read_mostly;
10 11
@@ -56,3 +57,7 @@ void __init pci_xen_swiotlb_init(void)
56 dma_ops = &xen_swiotlb_dma_ops; 57 dma_ops = &xen_swiotlb_dma_ops;
57 } 58 }
58} 59}
60IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
61 0,
62 pci_xen_swiotlb_init,
63 0);
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index c64a5d387de5..87508886cbbd 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -92,7 +92,7 @@ int show_interrupts(struct seq_file *p, void *v)
92 for_each_online_cpu(j) 92 for_each_online_cpu(j)
93 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 93 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
94#endif 94#endif
95 seq_printf(p, " %14s", irq_desc[i].chip->typename); 95 seq_printf(p, " %14s", irq_desc[i].chip->name);
96 seq_printf(p, " %s", action->name); 96 seq_printf(p, " %s", action->name);
97 97
98 for (action=action->next; action; action = action->next) 98 for (action=action->next; action; action = action->next)